1 /* 2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 /* 26 ******************************************************************************* 27 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * 28 * * 29 * The original version of this source code and documentation is copyrighted * 30 * and owned by IBM, These materials are provided under terms of a License * 31 * Agreement between IBM and Sun. This technology is protected by multiple * 32 * US and International patents. This notice and attribution to IBM may not * 33 * to removed. * 34 ******************************************************************************* 35 */ 36 37 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of 38 * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre 39 * concept of RUNS_ONLY which is a double operation. 40 * It could be advantageous to divide this into 3 concepts: 41 * a) Operation: direct / inverse / RUNS_ONLY 42 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L 43 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL 44 * This would allow combinations not possible today like RUNS_ONLY with 45 * NUMBERS_SPECIAL. 46 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and 47 * REMOVE_CONTROLS for the inverse step. 48 * Not all combinations would be supported, and probably not all do make sense. 49 * This would need to document which ones are supported and what are the 50 * fallbacks for unsupported combinations. 51 */ 52 53 package sun.text.bidi; 54 55 import java.io.IOException; 56 import java.lang.reflect.Array; 57 import java.lang.reflect.Field; 58 import java.lang.reflect.Method; 59 import java.lang.reflect.InvocationTargetException; 60 import java.text.AttributedCharacterIterator; 61 import java.text.Bidi; 62 import java.util.Arrays; 63 import java.util.MissingResourceException; 64 import sun.text.normalizer.UBiDiProps; 65 import sun.text.normalizer.UCharacter; 66 import sun.text.normalizer.UTF16; 67 68 /** 69 * 70 * <h2>Bidi algorithm for ICU</h2> 71 * 72 * This is an implementation of the Unicode Bidirectional algorithm. The 73 * algorithm is defined in the <a 74 * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 75 * version 13, also described in The Unicode Standard, Version 4.0 . 76 * <p> 77 * 78 * Note: Libraries that perform a bidirectional algorithm and reorder strings 79 * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and 80 * shaping (ArabicShaping) classes can be used at the core of such "Storage 81 * Layout Engines". 82 * 83 * <h3>General remarks about the API:</h3> 84 * 85 * The "limit" of a sequence of characters is the position just after 86 * their last character, i.e., one more than that position. 87 * <p> 88 * 89 * Some of the API methods provide access to "runs". Such a 90 * "run" is defined as a sequence of characters that are at the same 91 * embedding level after performing the Bidi algorithm. 92 * <p> 93 * 94 * <h3>Basic concept: paragraph</h3> 95 * A piece of text can be divided into several paragraphs by characters 96 * with the Bidi class <code>Block Separator</code>. For handling of 97 * paragraphs, see: 98 * <ul> 99 * <li>{@link #countParagraphs} 100 * <li>{@link #getParaLevel} 101 * <li>{@link #getParagraph} 102 * <li>{@link #getParagraphByIndex} 103 * </ul> 104 * 105 * <h3>Basic concept: text direction</h3> 106 * The direction of a piece of text may be: 107 * <ul> 108 * <li>{@link #LTR} 109 * <li>{@link #RTL} 110 * <li>{@link #MIXED} 111 * </ul> 112 * 113 * <h3>Basic concept: levels</h3> 114 * 115 * Levels in this API represent embedding levels according to the Unicode 116 * Bidirectional Algorithm. 117 * Their low-order bit (even/odd value) indicates the visual direction.<p> 118 * 119 * Levels can be abstract values when used for the 120 * <code>paraLevel</code> and <code>embeddingLevels</code> 121 * arguments of <code>setPara()</code>; there: 122 * <ul> 123 * <li>the high-order bit of an <code>embeddingLevels[]</code> 124 * value indicates whether the using application is 125 * specifying the level of a character to <i>override</i> whatever the 126 * Bidi implementation would resolve it to.</li> 127 * <li><code>paraLevel</code> can be set to the 128 * pseudo-level values <code>LEVEL_DEFAULT_LTR</code> 129 * and <code>LEVEL_DEFAULT_RTL</code>.</li> 130 * </ul> 131 * 132 * <p>The related constants are not real, valid level values. 133 * <code>DEFAULT_XXX</code> can be used to specify 134 * a default for the paragraph level for 135 * when the <code>setPara()</code> method 136 * shall determine it but there is no 137 * strongly typed character in the input.<p> 138 * 139 * Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even 140 * and the one for <code>LEVEL_DEFAULT_RTL</code> is odd, 141 * just like with normal LTR and RTL level values - 142 * these special values are designed that way. Also, the implementation 143 * assumes that MAX_EXPLICIT_LEVEL is odd. 144 * 145 * <ul><b>See Also:</b> 146 * <li>{@link #LEVEL_DEFAULT_LTR} 147 * <li>{@link #LEVEL_DEFAULT_RTL} 148 * <li>{@link #LEVEL_OVERRIDE} 149 * <li>{@link #MAX_EXPLICIT_LEVEL} 150 * <li>{@link #setPara} 151 * </ul> 152 * 153 * <h3>Basic concept: Reordering Mode</h3> 154 * Reordering mode values indicate which variant of the Bidi algorithm to 155 * use. 156 * 157 * <ul><b>See Also:</b> 158 * <li>{@link #setReorderingMode} 159 * <li>{@link #REORDER_DEFAULT} 160 * <li>{@link #REORDER_NUMBERS_SPECIAL} 161 * <li>{@link #REORDER_GROUP_NUMBERS_WITH_R} 162 * <li>{@link #REORDER_RUNS_ONLY} 163 * <li>{@link #REORDER_INVERSE_NUMBERS_AS_L} 164 * <li>{@link #REORDER_INVERSE_LIKE_DIRECT} 165 * <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL} 166 * </ul> 167 * 168 * <h3>Basic concept: Reordering Options</h3> 169 * Reordering options can be applied during Bidi text transformations. 170 * <ul><b>See Also:</b> 171 * <li>{@link #setReorderingOptions} 172 * <li>{@link #OPTION_DEFAULT} 173 * <li>{@link #OPTION_INSERT_MARKS} 174 * <li>{@link #OPTION_REMOVE_CONTROLS} 175 * <li>{@link #OPTION_STREAMING} 176 * </ul> 177 * 178 * 179 * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer) 180 * @stable ICU 3.8 181 * 182 * 183 * <h4> Sample code for the ICU Bidi API </h4> 184 * 185 * <h5>Rendering a paragraph with the ICU Bidi API</h5> 186 * 187 * This is (hypothetical) sample code that illustrates how the ICU Bidi API 188 * could be used to render a paragraph of text. Rendering code depends highly on 189 * the graphics system, therefore this sample code must make a lot of 190 * assumptions, which may or may not match any existing graphics system's 191 * properties. 192 * 193 * <p> 194 * The basic assumptions are: 195 * </p> 196 * <ul> 197 * <li>Rendering is done from left to right on a horizontal line.</li> 198 * <li>A run of single-style, unidirectional text can be rendered at once. 199 * </li> 200 * <li>Such a run of text is passed to the graphics system with characters 201 * (code units) in logical order.</li> 202 * <li>The line-breaking algorithm is very complicated and Locale-dependent - 203 * and therefore its implementation omitted from this sample code.</li> 204 * </ul> 205 * 206 * <pre> 207 * 208 * package com.ibm.icu.dev.test.bidi; 209 * 210 * import com.ibm.icu.text.Bidi; 211 * import com.ibm.icu.text.BidiRun; 212 * 213 * public class Sample { 214 * 215 * static final int styleNormal = 0; 216 * static final int styleSelected = 1; 217 * static final int styleBold = 2; 218 * static final int styleItalics = 4; 219 * static final int styleSuper=8; 220 * static final int styleSub = 16; 221 * 222 * static class StyleRun { 223 * int limit; 224 * int style; 225 * 226 * public StyleRun(int limit, int style) { 227 * this.limit = limit; 228 * this.style = style; 229 * } 230 * } 231 * 232 * static class Bounds { 233 * int start; 234 * int limit; 235 * 236 * public Bounds(int start, int limit) { 237 * this.start = start; 238 * this.limit = limit; 239 * } 240 * } 241 * 242 * static int getTextWidth(String text, int start, int limit, 243 * StyleRun[] styleRuns, int styleRunCount) { 244 * // simplistic way to compute the width 245 * return limit - start; 246 * } 247 * 248 * // set limit and StyleRun limit for a line 249 * // from text[start] and from styleRuns[styleRunStart] 250 * // using Bidi.getLogicalRun(...) 251 * // returns line width 252 * static int getLineBreak(String text, Bounds line, Bidi para, 253 * StyleRun styleRuns[], Bounds styleRun) { 254 * // dummy return 255 * return 0; 256 * } 257 * 258 * // render runs on a line sequentially, always from left to right 259 * 260 * // prepare rendering a new line 261 * static void startLine(byte textDirection, int lineWidth) { 262 * System.out.println(); 263 * } 264 * 265 * // render a run of text and advance to the right by the run width 266 * // the text[start..limit-1] is always in logical order 267 * static void renderRun(String text, int start, int limit, 268 * byte textDirection, int style) { 269 * } 270 * 271 * // We could compute a cross-product 272 * // from the style runs with the directional runs 273 * // and then reorder it. 274 * // Instead, here we iterate over each run type 275 * // and render the intersections - 276 * // with shortcuts in simple (and common) cases. 277 * // renderParagraph() is the main function. 278 * 279 * // render a directional run with 280 * // (possibly) multiple style runs intersecting with it 281 * static void renderDirectionalRun(String text, int start, int limit, 282 * byte direction, StyleRun styleRuns[], 283 * int styleRunCount) { 284 * int i; 285 * 286 * // iterate over style runs 287 * if (direction == Bidi.LTR) { 288 * int styleLimit; 289 * for (i = 0; i < styleRunCount; ++i) { 290 * styleLimit = styleRuns[i].limit; 291 * if (start < styleLimit) { 292 * if (styleLimit > limit) { 293 * styleLimit = limit; 294 * } 295 * renderRun(text, start, styleLimit, 296 * direction, styleRuns[i].style); 297 * if (styleLimit == limit) { 298 * break; 299 * } 300 * start = styleLimit; 301 * } 302 * } 303 * } else { 304 * int styleStart; 305 * 306 * for (i = styleRunCount-1; i >= 0; --i) { 307 * if (i > 0) { 308 * styleStart = styleRuns[i-1].limit; 309 * } else { 310 * styleStart = 0; 311 * } 312 * if (limit >= styleStart) { 313 * if (styleStart < start) { 314 * styleStart = start; 315 * } 316 * renderRun(text, styleStart, limit, direction, 317 * styleRuns[i].style); 318 * if (styleStart == start) { 319 * break; 320 * } 321 * limit = styleStart; 322 * } 323 * } 324 * } 325 * } 326 * 327 * // the line object represents text[start..limit-1] 328 * static void renderLine(Bidi line, String text, int start, int limit, 329 * StyleRun styleRuns[], int styleRunCount) { 330 * byte direction = line.getDirection(); 331 * if (direction != Bidi.MIXED) { 332 * // unidirectional 333 * if (styleRunCount <= 1) { 334 * renderRun(text, start, limit, direction, styleRuns[0].style); 335 * } else { 336 * renderDirectionalRun(text, start, limit, direction, 337 * styleRuns, styleRunCount); 338 * } 339 * } else { 340 * // mixed-directional 341 * int count, i; 342 * BidiRun run; 343 * 344 * try { 345 * count = line.countRuns(); 346 * } catch (IllegalStateException e) { 347 * e.printStackTrace(); 348 * return; 349 * } 350 * if (styleRunCount <= 1) { 351 * int style = styleRuns[0].style; 352 * 353 * // iterate over directional runs 354 * for (i = 0; i < count; ++i) { 355 * run = line.getVisualRun(i); 356 * renderRun(text, run.getStart(), run.getLimit(), 357 * run.getDirection(), style); 358 * } 359 * } else { 360 * // iterate over both directional and style runs 361 * for (i = 0; i < count; ++i) { 362 * run = line.getVisualRun(i); 363 * renderDirectionalRun(text, run.getStart(), 364 * run.getLimit(), run.getDirection(), 365 * styleRuns, styleRunCount); 366 * } 367 * } 368 * } 369 * } 370 * 371 * static void renderParagraph(String text, byte textDirection, 372 * StyleRun styleRuns[], int styleRunCount, 373 * int lineWidth) { 374 * int length = text.length(); 375 * Bidi para = new Bidi(); 376 * try { 377 * para.setPara(text, 378 * textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL 379 * : Bidi.LEVEL_DEFAULT_LTR, 380 * null); 381 * } catch (Exception e) { 382 * e.printStackTrace(); 383 * return; 384 * } 385 * byte paraLevel = (byte)(1 & para.getParaLevel()); 386 * StyleRun styleRun = new StyleRun(length, styleNormal); 387 * 388 * if (styleRuns == null || styleRunCount <= 0) { 389 * styleRuns = new StyleRun[1]; 390 * styleRunCount = 1; 391 * styleRuns[0] = styleRun; 392 * } 393 * // assume styleRuns[styleRunCount-1].limit>=length 394 * 395 * int width = getTextWidth(text, 0, length, styleRuns, styleRunCount); 396 * if (width <= lineWidth) { 397 * // everything fits onto one line 398 * 399 * // prepare rendering a new line from either left or right 400 * startLine(paraLevel, width); 401 * 402 * renderLine(para, text, 0, length, styleRuns, styleRunCount); 403 * } else { 404 * // we need to render several lines 405 * Bidi line = new Bidi(length, 0); 406 * int start = 0, limit; 407 * int styleRunStart = 0, styleRunLimit; 408 * 409 * for (;;) { 410 * limit = length; 411 * styleRunLimit = styleRunCount; 412 * width = getLineBreak(text, new Bounds(start, limit), 413 * para, styleRuns, 414 * new Bounds(styleRunStart, styleRunLimit)); 415 * try { 416 * line = para.setLine(start, limit); 417 * } catch (Exception e) { 418 * e.printStackTrace(); 419 * return; 420 * } 421 * // prepare rendering a new line 422 * // from either left or right 423 * startLine(paraLevel, width); 424 * 425 * if (styleRunStart > 0) { 426 * int newRunCount = styleRuns.length - styleRunStart; 427 * StyleRun[] newRuns = new StyleRun[newRunCount]; 428 * System.arraycopy(styleRuns, styleRunStart, newRuns, 0, 429 * newRunCount); 430 * renderLine(line, text, start, limit, newRuns, 431 * styleRunLimit - styleRunStart); 432 * } else { 433 * renderLine(line, text, start, limit, styleRuns, 434 * styleRunLimit - styleRunStart); 435 * } 436 * if (limit == length) { 437 * break; 438 * } 439 * start = limit; 440 * styleRunStart = styleRunLimit - 1; 441 * if (start >= styleRuns[styleRunStart].limit) { 442 * ++styleRunStart; 443 * } 444 * } 445 * } 446 * } 447 * 448 * public static void main(String[] args) 449 * { 450 * renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80); 451 * renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60); 452 * } 453 * } 454 * 455 * </pre> 456 */ 457 458 public class BidiBase { 459 460 class Point { 461 int pos; /* position in text */ 462 int flag; /* flag for LRM/RLM, before/after */ 463 } 464 465 class InsertPoints { 466 int size; 467 int confirmed; 468 Point[] points = new Point[0]; 469 } 470 471 /** Paragraph level setting<p> 472 * 473 * Constant indicating that the base direction depends on the first strong 474 * directional character in the text according to the Unicode Bidirectional 475 * Algorithm. If no strong directional character is present, 476 * then set the paragraph level to 0 (left-to-right).<p> 477 * 478 * If this value is used in conjunction with reordering modes 479 * <code>REORDER_INVERSE_LIKE_DIRECT</code> or 480 * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 481 * is assumed to be visual LTR, and the text after reordering is required 482 * to be the corresponding logical string with appropriate contextual 483 * direction. The direction of the result string will be RTL if either 484 * the righmost or leftmost strong character of the source text is RTL 485 * or Arabic Letter, the direction will be LTR otherwise.<p> 486 * 487 * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may 488 * be added at the beginning of the result string to ensure round trip 489 * (that the result string, when reordered back to visual, will produce 490 * the original source text). 491 * @see #REORDER_INVERSE_LIKE_DIRECT 492 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 493 * @stable ICU 3.8 494 */ 495 public static final byte INTERNAL_LEVEL_DEFAULT_LTR = (byte)0x7e; 496 497 /** Paragraph level setting<p> 498 * 499 * Constant indicating that the base direction depends on the first strong 500 * directional character in the text according to the Unicode Bidirectional 501 * Algorithm. If no strong directional character is present, 502 * then set the paragraph level to 1 (right-to-left).<p> 503 * 504 * If this value is used in conjunction with reordering modes 505 * <code>REORDER_INVERSE_LIKE_DIRECT</code> or 506 * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 507 * is assumed to be visual LTR, and the text after reordering is required 508 * to be the corresponding logical string with appropriate contextual 509 * direction. The direction of the result string will be RTL if either 510 * the righmost or leftmost strong character of the source text is RTL 511 * or Arabic Letter, or if the text contains no strong character; 512 * the direction will be LTR otherwise.<p> 513 * 514 * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may 515 * be added at the beginning of the result string to ensure round trip 516 * (that the result string, when reordered back to visual, will produce 517 * the original source text). 518 * @see #REORDER_INVERSE_LIKE_DIRECT 519 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 520 * @stable ICU 3.8 521 */ 522 public static final byte INTERNAL_LEVEL_DEFAULT_RTL = (byte)0x7f; 523 524 /** 525 * Maximum explicit embedding level. 526 * (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>). 527 * @stable ICU 3.8 528 */ 529 public static final byte MAX_EXPLICIT_LEVEL = 61; 530 531 /** 532 * Bit flag for level input. 533 * Overrides directional properties. 534 * @stable ICU 3.8 535 */ 536 public static final byte INTERNAL_LEVEL_OVERRIDE = (byte)0x80; 537 538 /** 539 * Special value which can be returned by the mapping methods when a 540 * logical index has no corresponding visual index or vice-versa. This may 541 * happen for the logical-to-visual mapping of a Bidi control when option 542 * <code>OPTION_REMOVE_CONTROLS</code> is 543 * specified. This can also happen for the visual-to-logical mapping of a 544 * Bidi mark (LRM or RLM) inserted by option 545 * <code>OPTION_INSERT_MARKS</code>. 546 * @see #getVisualIndex 547 * @see #getVisualMap 548 * @see #getLogicalIndex 549 * @see #getLogicalMap 550 * @see #OPTION_INSERT_MARKS 551 * @see #OPTION_REMOVE_CONTROLS 552 * @stable ICU 3.8 553 */ 554 public static final int MAP_NOWHERE = -1; 555 556 /** 557 * Mixed-directional text. 558 * @stable ICU 3.8 559 */ 560 public static final byte MIXED = 2; 561 562 /** 563 * option bit for writeReordered(): 564 * replace characters with the "mirrored" property in RTL runs 565 * by their mirror-image mappings 566 * 567 * @see #writeReordered 568 * @stable ICU 3.8 569 */ 570 public static final short DO_MIRRORING = 2; 571 572 /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode. 573 * @see #setReorderingMode 574 * @stable ICU 3.8 575 */ 576 private static final short REORDER_DEFAULT = 0; 577 578 /** Reordering mode: Logical to Visual algorithm which handles numbers in 579 * a way which mimicks the behavior of Windows XP. 580 * @see #setReorderingMode 581 * @stable ICU 3.8 582 */ 583 private static final short REORDER_NUMBERS_SPECIAL = 1; 584 585 /** Reordering mode: Logical to Visual algorithm grouping numbers with 586 * adjacent R characters (reversible algorithm). 587 * @see #setReorderingMode 588 * @stable ICU 3.8 589 */ 590 private static final short REORDER_GROUP_NUMBERS_WITH_R = 2; 591 592 /** Reordering mode: Reorder runs only to transform a Logical LTR string 593 * to the logical RTL string with the same display, or vice-versa.<br> 594 * If this mode is set together with option 595 * <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source 596 * text may be removed and other controls may be added to produce the 597 * minimum combination which has the required display. 598 * @see #OPTION_INSERT_MARKS 599 * @see #setReorderingMode 600 * @stable ICU 3.8 601 */ 602 private static final short REORDER_RUNS_ONLY = 3; 603 604 /** Reordering mode: Visual to Logical algorithm which handles numbers 605 * like L (same algorithm as selected by <code>setInverse(true)</code>. 606 * @see #setInverse 607 * @see #setReorderingMode 608 * @stable ICU 3.8 609 */ 610 private static final short REORDER_INVERSE_NUMBERS_AS_L = 4; 611 612 /** Reordering mode: Visual to Logical algorithm equivalent to the regular 613 * Logical to Visual algorithm. 614 * @see #setReorderingMode 615 * @stable ICU 3.8 616 */ 617 private static final short REORDER_INVERSE_LIKE_DIRECT = 5; 618 619 /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the 620 * <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm. 621 * @see #setReorderingMode 622 * @stable ICU 3.8 623 */ 624 private static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6; 625 626 /* Reordering mode values must be ordered so that all the regular logical to 627 * visual modes come first, and all inverse Bidi modes come last. 628 */ 629 private static final short REORDER_LAST_LOGICAL_TO_VISUAL = 630 REORDER_NUMBERS_SPECIAL; 631 632 /** 633 * Option bit for <code>setReorderingOptions</code>: 634 * insert Bidi marks (LRM or RLM) when needed to ensure correct result of 635 * a reordering to a Logical order 636 * 637 * <p>This option must be set or reset before calling 638 * <code>setPara</code>.</p> 639 * 640 * <p>This option is significant only with reordering modes which generate 641 * a result with Logical order, specifically.</p> 642 * <ul> 643 * <li><code>REORDER_RUNS_ONLY</code></li> 644 * <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li> 645 * <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li> 646 * <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li> 647 * </ul> 648 * 649 * <p>If this option is set in conjunction with reordering mode 650 * <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling 651 * <code>setInverse(true)</code>, it implies option 652 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 653 * <code>writeReordered()</code>.</p> 654 * 655 * <p>For other reordering modes, a minimum number of LRM or RLM characters 656 * will be added to the source text after reordering it so as to ensure 657 * round trip, i.e. when applying the inverse reordering mode on the 658 * resulting logical text with removal of Bidi marks 659 * (option <code>OPTION_REMOVE_CONTROLS</code> set before calling 660 * <code>setPara()</code> or option 661 * <code>REMOVE_BIDI_CONTROLS</code> in 662 * <code>writeReordered</code>), the result will be identical to the 663 * source text in the first transformation. 664 * 665 * <p>This option will be ignored if specified together with option 666 * <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option 667 * <code>REMOVE_BIDI_CONTROLS</code> in calls to method 668 * <code>writeReordered()</code> and it implies option 669 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 670 * <code>writeReordered()</code> if the reordering mode is 671 * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p> 672 * 673 * @see #setReorderingMode 674 * @see #setReorderingOptions 675 * @see #INSERT_LRM_FOR_NUMERIC 676 * @see #REMOVE_BIDI_CONTROLS 677 * @see #OPTION_REMOVE_CONTROLS 678 * @see #REORDER_RUNS_ONLY 679 * @see #REORDER_INVERSE_NUMBERS_AS_L 680 * @see #REORDER_INVERSE_LIKE_DIRECT 681 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 682 * @stable ICU 3.8 683 */ 684 private static final int OPTION_INSERT_MARKS = 1; 685 686 /** 687 * Option bit for <code>setReorderingOptions</code>: 688 * remove Bidi control characters 689 * 690 * <p>This option must be set or reset before calling 691 * <code>setPara</code>.</p> 692 * 693 * <p>This option nullifies option 694 * <code>OPTION_INSERT_MARKS</code>. It inhibits option 695 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 696 * <code>writeReordered()</code> and it implies option 697 * <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p> 698 * 699 * @see #setReorderingMode 700 * @see #setReorderingOptions 701 * @see #OPTION_INSERT_MARKS 702 * @see #INSERT_LRM_FOR_NUMERIC 703 * @see #REMOVE_BIDI_CONTROLS 704 * @stable ICU 3.8 705 */ 706 private static final int OPTION_REMOVE_CONTROLS = 2; 707 708 /** 709 * Option bit for <code>setReorderingOptions</code>: 710 * process the output as part of a stream to be continued 711 * 712 * <p>This option must be set or reset before calling 713 * <code>setPara</code>.</p> 714 * 715 * <p>This option specifies that the caller is interested in processing 716 * large text object in parts. The results of the successive calls are 717 * expected to be concatenated by the caller. Only the call for the last 718 * part will have this option bit off.</p> 719 * 720 * <p>When this option bit is on, <code>setPara()</code> may process 721 * less than the full source text in order to truncate the text at a 722 * meaningful boundary. The caller should call 723 * <code>getProcessedLength()</code> immediately after calling 724 * <code>setPara()</code> in order to determine how much of the source 725 * text has been processed. Source text beyond that length should be 726 * resubmitted in following calls to <code>setPara</code>. The 727 * processed length may be less than the length of the source text if a 728 * character preceding the last character of the source text constitutes a 729 * reasonable boundary (like a block separator) for text to be continued.<br> 730 * If the last character of the source text constitutes a reasonable 731 * boundary, the whole text will be processed at once.<br> 732 * If nowhere in the source text there exists 733 * such a reasonable boundary, the processed length will be zero.<br> 734 * The caller should check for such an occurrence and do one of the following: 735 * <ul><li>submit a larger amount of text with a better chance to include 736 * a reasonable boundary.</li> 737 * <li>resubmit the same text after turning off option 738 * <code>OPTION_STREAMING</code>.</li></ul> 739 * In all cases, this option should be turned off before processing the last 740 * part of the text.</p> 741 * 742 * <p>When the <code>OPTION_STREAMING</code> option is used, it is 743 * recommended to call <code>orderParagraphsLTR()</code> with argument 744 * <code>orderParagraphsLTR</code> set to <code>true</code> before calling 745 * <code>setPara()</code> so that later paragraphs may be concatenated to 746 * previous paragraphs on the right. 747 * </p> 748 * 749 * @see #setReorderingMode 750 * @see #setReorderingOptions 751 * @see #getProcessedLength 752 * @see #orderParagraphsLTR 753 * @stable ICU 3.8 754 */ 755 private static final int OPTION_STREAMING = 4; 756 757 /* 758 * Comparing the description of the Bidi algorithm with this implementation 759 * is easier with the same names for the Bidi types in the code as there. 760 * See UCharacterDirection 761 */ 762 private static final byte L = 0; 763 private static final byte R = 1; 764 private static final byte EN = 2; 765 private static final byte ES = 3; 766 private static final byte ET = 4; 767 private static final byte AN = 5; 768 private static final byte CS = 6; 769 static final byte B = 7; 770 private static final byte S = 8; 771 private static final byte WS = 9; 772 private static final byte ON = 10; 773 private static final byte LRE = 11; 774 private static final byte LRO = 12; 775 private static final byte AL = 13; 776 private static final byte RLE = 14; 777 private static final byte RLO = 15; 778 private static final byte PDF = 16; 779 private static final byte NSM = 17; 780 private static final byte BN = 18; 781 782 private static final int MASK_R_AL = (1 << R | 1 << AL); 783 784 private static final char CR = '\r'; 785 private static final char LF = '\n'; 786 787 static final int LRM_BEFORE = 1; 788 static final int LRM_AFTER = 2; 789 static final int RLM_BEFORE = 4; 790 static final int RLM_AFTER = 8; 791 792 /* 793 * reference to parent paragraph object (reference to self if this object is 794 * a paragraph object); set to null in a newly opened object; set to a 795 * real value after a successful execution of setPara or setLine 796 */ 797 BidiBase paraBidi; 798 799 final UBiDiProps bdp; 800 801 /* character array representing the current text */ 802 char[] text; 803 804 /* length of the current text */ 805 int originalLength; 806 807 /* if the option OPTION_STREAMING is set, this is the length of 808 * text actually processed by <code>setPara</code>, which may be shorter 809 * than the original length. Otherwise, it is identical to the original 810 * length. 811 */ 812 public int length; 813 814 /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi 815 * marks are allowed to be inserted in one of the reordering modes, the 816 * length of the result string may be different from the processed length. 817 */ 818 int resultLength; 819 820 /* indicators for whether memory may be allocated after construction */ 821 boolean mayAllocateText; 822 boolean mayAllocateRuns; 823 824 /* arrays with one value per text-character */ 825 byte[] dirPropsMemory = new byte[1]; 826 byte[] levelsMemory = new byte[1]; 827 byte[] dirProps; 828 byte[] levels; 829 830 /* must block separators receive level 0? */ 831 boolean orderParagraphsLTR; 832 833 /* the paragraph level */ 834 byte paraLevel; 835 836 /* original paraLevel when contextual */ 837 /* must be one of DEFAULT_xxx or 0 if not contextual */ 838 byte defaultParaLevel; 839 840 /* the following is set in setPara, used in processPropertySeq */ 841 842 ImpTabPair impTabPair; /* reference to levels state table pair */ 843 844 /* the overall paragraph or line directionality*/ 845 byte direction; 846 847 /* flags is a bit set for which directional properties are in the text */ 848 int flags; 849 850 /* lastArabicPos is index to the last AL in the text, -1 if none */ 851 int lastArabicPos; 852 853 /* characters after trailingWSStart are WS and are */ 854 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ 855 int trailingWSStart; 856 857 /* fields for paragraph handling */ 858 int paraCount; /* set in getDirProps() */ 859 int[] parasMemory = new int[1]; 860 int[] paras; /* limits of paragraphs, filled in 861 ResolveExplicitLevels() or CheckExplicitLevels() */ 862 863 /* for single paragraph text, we only need a tiny array of paras (no allocation) */ 864 int[] simpleParas = {0}; 865 866 /* fields for line reordering */ 867 int runCount; /* ==-1: runs not set up yet */ 868 BidiRun[] runsMemory = new BidiRun[0]; 869 BidiRun[] runs; 870 871 /* for non-mixed text, we only need a tiny array of runs (no allocation) */ 872 BidiRun[] simpleRuns = {new BidiRun()}; 873 874 /* mapping of runs in logical order to visual order */ 875 int[] logicalToVisualRunsMap; 876 877 /* flag to indicate that the map has been updated */ 878 boolean isGoodLogicalToVisualRunsMap; 879 880 /* for inverse Bidi with insertion of directional marks */ 881 InsertPoints insertPoints = new InsertPoints(); 882 883 /* for option OPTION_REMOVE_CONTROLS */ 884 int controlCount; 885 886 /* 887 * Sometimes, bit values are more appropriate 888 * to deal with directionality properties. 889 * Abbreviations in these method names refer to names 890 * used in the Bidi algorithm. 891 */ 892 static int DirPropFlag(byte dir) { 893 return (1 << dir); 894 } 895 896 /* 897 * The following bit is ORed to the property of characters in paragraphs 898 * with contextual RTL direction when paraLevel is contextual. 899 */ 900 static final byte CONTEXT_RTL_SHIFT = 6; 901 static final byte CONTEXT_RTL = (byte)(1<<CONTEXT_RTL_SHIFT); // 0x40 902 static byte NoContextRTL(byte dir) 903 { 904 return (byte)(dir & ~CONTEXT_RTL); 905 } 906 907 /* 908 * The following is a variant of DirProp.DirPropFlag() which ignores the 909 * CONTEXT_RTL bit. 910 */ 911 static int DirPropFlagNC(byte dir) { 912 return (1<<(dir & ~CONTEXT_RTL)); 913 } 914 915 static final int DirPropFlagMultiRuns = DirPropFlag((byte)31); 916 917 /* to avoid some conditional statements, use tiny constant arrays */ 918 static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) }; 919 static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) }; 920 static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) }; 921 922 static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; } 923 static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; } 924 static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; } 925 926 /* 927 * are there any characters that are LTR? 928 */ 929 static final int MASK_LTR = 930 DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO); 931 932 /* 933 * are there any characters that are RTL? 934 */ 935 static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO); 936 937 /* explicit embedding codes */ 938 private static final int MASK_LRX = DirPropFlag(LRE)|DirPropFlag(LRO); 939 private static final int MASK_RLX = DirPropFlag(RLE)|DirPropFlag(RLO); 940 private static final int MASK_EXPLICIT = MASK_LRX|MASK_RLX|DirPropFlag(PDF); 941 private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT; 942 943 /* paragraph and segment separators */ 944 private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S); 945 946 /* all types that are counted as White Space or Neutral in some steps */ 947 static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT; 948 private static final int MASK_N = DirPropFlag(ON)|MASK_WS; 949 950 /* types that are neutrals or could becomes neutrals in (Wn) */ 951 private static final int MASK_POSSIBLE_N = DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_N; 952 953 /* 954 * These types may be changed to "e", 955 * the embedding type (L or R) of the run, 956 * in the Bidi algorithm (N2) 957 */ 958 static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N; 959 960 /* 961 * the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java 962 */ 963 private static byte GetLRFromLevel(byte level) 964 { 965 return (byte)(level & 1); 966 } 967 968 private static boolean IsDefaultLevel(byte level) 969 { 970 return ((level & INTERNAL_LEVEL_DEFAULT_LTR) == INTERNAL_LEVEL_DEFAULT_LTR); 971 } 972 973 byte GetParaLevelAt(int index) 974 { 975 return (defaultParaLevel != 0) ? 976 (byte)(dirProps[index]>>CONTEXT_RTL_SHIFT) : paraLevel; 977 } 978 979 static boolean IsBidiControlChar(int c) 980 { 981 /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or 982 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */ 983 return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))); 984 } 985 986 public void verifyValidPara() 987 { 988 if (this != this.paraBidi) { 989 throw new IllegalStateException(""); 990 } 991 } 992 993 public void verifyValidParaOrLine() 994 { 995 BidiBase para = this.paraBidi; 996 /* verify Para */ 997 if (this == para) { 998 return; 999 } 1000 /* verify Line */ 1001 if ((para == null) || (para != para.paraBidi)) { 1002 throw new IllegalStateException(); 1003 } 1004 } 1005 1006 public void verifyRange(int index, int start, int limit) 1007 { 1008 if (index < start || index >= limit) { 1009 throw new IllegalArgumentException("Value " + index + 1010 " is out of range " + start + " to " + limit); 1011 } 1012 } 1013 1014 public void verifyIndex(int index, int start, int limit) 1015 { 1016 if (index < start || index >= limit) { 1017 throw new ArrayIndexOutOfBoundsException("Index " + index + 1018 " is out of range " + start + " to " + limit); 1019 } 1020 } 1021 1022 /** 1023 * Allocate a <code>Bidi</code> object with preallocated memory 1024 * for internal structures. 1025 * This method provides a <code>Bidi</code> object like the default constructor 1026 * but it also preallocates memory for internal structures 1027 * according to the sizings supplied by the caller.<p> 1028 * The preallocation can be limited to some of the internal memory 1029 * by setting some values to 0 here. That means that if, e.g., 1030 * <code>maxRunCount</code> cannot be reasonably predetermined and should not 1031 * be set to <code>maxLength</code> (the only failproof value) to avoid 1032 * wasting memory, then <code>maxRunCount</code> could be set to 0 here 1033 * and the internal structures that are associated with it will be allocated 1034 * on demand, just like with the default constructor. 1035 * 1036 * @param maxLength is the maximum text or line length that internal memory 1037 * will be preallocated for. An attempt to associate this object with a 1038 * longer text will fail, unless this value is 0, which leaves the allocation 1039 * up to the implementation. 1040 * 1041 * @param maxRunCount is the maximum anticipated number of same-level runs 1042 * that internal memory will be preallocated for. An attempt to access 1043 * visual runs on an object that was not preallocated for as many runs 1044 * as the text was actually resolved to will fail, 1045 * unless this value is 0, which leaves the allocation up to the implementation.<br><br> 1046 * The number of runs depends on the actual text and maybe anywhere between 1047 * 1 and <code>maxLength</code>. It is typically small. 1048 * 1049 * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0 1050 * @stable ICU 3.8 1051 */ 1052 public BidiBase(int maxLength, int maxRunCount) 1053 { 1054 /* check the argument values */ 1055 if (maxLength < 0 || maxRunCount < 0) { 1056 throw new IllegalArgumentException(); 1057 } 1058 1059 /* reset the object, all reference variables null, all flags false, 1060 all sizes 0. 1061 In fact, we don't need to do anything, since class members are 1062 initialized as zero when an instance is created. 1063 */ 1064 /* 1065 mayAllocateText = false; 1066 mayAllocateRuns = false; 1067 orderParagraphsLTR = false; 1068 paraCount = 0; 1069 runCount = 0; 1070 trailingWSStart = 0; 1071 flags = 0; 1072 paraLevel = 0; 1073 defaultParaLevel = 0; 1074 direction = 0; 1075 */ 1076 /* get Bidi properties */ 1077 try { 1078 bdp = UBiDiProps.getSingleton(); 1079 } 1080 catch (IOException e) { 1081 throw new MissingResourceException(e.getMessage(), "(BidiProps)", ""); 1082 } 1083 1084 /* allocate memory for arrays as requested */ 1085 if (maxLength > 0) { 1086 getInitialDirPropsMemory(maxLength); 1087 getInitialLevelsMemory(maxLength); 1088 } else { 1089 mayAllocateText = true; 1090 } 1091 1092 if (maxRunCount > 0) { 1093 // if maxRunCount == 1, use simpleRuns[] 1094 if (maxRunCount > 1) { 1095 getInitialRunsMemory(maxRunCount); 1096 } 1097 } else { 1098 mayAllocateRuns = true; 1099 } 1100 } 1101 1102 /* 1103 * We are allowed to allocate memory if object==null or 1104 * mayAllocate==true for each array that we need. 1105 * 1106 * Assume sizeNeeded>0. 1107 * If object != null, then assume size > 0. 1108 */ 1109 private Object getMemory(String label, Object array, Class arrayClass, 1110 boolean mayAllocate, int sizeNeeded) 1111 { 1112 int len = Array.getLength(array); 1113 1114 /* we have at least enough memory and must not allocate */ 1115 if (sizeNeeded == len) { 1116 return array; 1117 } 1118 if (!mayAllocate) { 1119 /* we must not allocate */ 1120 if (sizeNeeded <= len) { 1121 return array; 1122 } 1123 throw new OutOfMemoryError("Failed to allocate memory for " 1124 + label); 1125 } 1126 /* we may try to grow or shrink */ 1127 /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid 1128 the allocation altogether and rely on this.length */ 1129 try { 1130 return Array.newInstance(arrayClass, sizeNeeded); 1131 } catch (Exception e) { 1132 throw new OutOfMemoryError("Failed to allocate memory for " 1133 + label); 1134 } 1135 } 1136 1137 /* helper methods for each allocated array */ 1138 private void getDirPropsMemory(boolean mayAllocate, int len) 1139 { 1140 Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len); 1141 dirPropsMemory = (byte[]) array; 1142 } 1143 1144 void getDirPropsMemory(int len) 1145 { 1146 getDirPropsMemory(mayAllocateText, len); 1147 } 1148 1149 private void getLevelsMemory(boolean mayAllocate, int len) 1150 { 1151 Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len); 1152 levelsMemory = (byte[]) array; 1153 } 1154 1155 void getLevelsMemory(int len) 1156 { 1157 getLevelsMemory(mayAllocateText, len); 1158 } 1159 1160 private void getRunsMemory(boolean mayAllocate, int len) 1161 { 1162 Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len); 1163 runsMemory = (BidiRun[]) array; 1164 } 1165 1166 void getRunsMemory(int len) 1167 { 1168 getRunsMemory(mayAllocateRuns, len); 1169 } 1170 1171 /* additional methods used by constructor - always allow allocation */ 1172 private void getInitialDirPropsMemory(int len) 1173 { 1174 getDirPropsMemory(true, len); 1175 } 1176 1177 private void getInitialLevelsMemory(int len) 1178 { 1179 getLevelsMemory(true, len); 1180 } 1181 1182 private void getInitialParasMemory(int len) 1183 { 1184 Object array = getMemory("Paras", parasMemory, Integer.TYPE, true, len); 1185 parasMemory = (int[]) array; 1186 } 1187 1188 private void getInitialRunsMemory(int len) 1189 { 1190 getRunsMemory(true, len); 1191 } 1192 1193 /* perform (P2)..(P3) ------------------------------------------------------- */ 1194 1195 private void getDirProps() 1196 { 1197 int i = 0, i0, i1; 1198 flags = 0; /* collect all directionalities in the text */ 1199 int uchar; 1200 byte dirProp; 1201 byte paraDirDefault = 0; /* initialize to avoid compiler warnings */ 1202 boolean isDefaultLevel = IsDefaultLevel(paraLevel); 1203 /* for inverse Bidi, the default para level is set to RTL if there is a 1204 strong R or AL character at either end of the text */ 1205 lastArabicPos = -1; 1206 controlCount = 0; 1207 1208 final int NOT_CONTEXTUAL = 0; /* 0: not contextual paraLevel */ 1209 final int LOOKING_FOR_STRONG = 1; /* 1: looking for first strong char */ 1210 final int FOUND_STRONG_CHAR = 2; /* 2: found first strong char */ 1211 1212 int state; 1213 int paraStart = 0; /* index of first char in paragraph */ 1214 byte paraDir; /* == CONTEXT_RTL within paragraphs 1215 starting with strong R char */ 1216 byte lastStrongDir=0; /* for default level & inverse Bidi */ 1217 int lastStrongLTR=0; /* for STREAMING option */ 1218 1219 if (isDefaultLevel) { 1220 paraDirDefault = ((paraLevel & 1) != 0) ? CONTEXT_RTL : 0; 1221 paraDir = paraDirDefault; 1222 lastStrongDir = paraDirDefault; 1223 state = LOOKING_FOR_STRONG; 1224 } else { 1225 state = NOT_CONTEXTUAL; 1226 paraDir = 0; 1227 } 1228 /* count paragraphs and determine the paragraph level (P2..P3) */ 1229 /* 1230 * see comment on constant fields: 1231 * the LEVEL_DEFAULT_XXX values are designed so that 1232 * their low-order bit alone yields the intended default 1233 */ 1234 1235 for (i = 0; i < originalLength; /* i is incremented in the loop */) { 1236 i0 = i; /* index of first code unit */ 1237 uchar = UTF16.charAt(text, 0, originalLength, i); 1238 i += Character.charCount(uchar); 1239 i1 = i - 1; /* index of last code unit, gets the directional property */ 1240 1241 dirProp = (byte)bdp.getClass(uchar); 1242 1243 flags |= DirPropFlag(dirProp); 1244 dirProps[i1] = (byte)(dirProp | paraDir); 1245 if (i1 > i0) { /* set previous code units' properties to BN */ 1246 flags |= DirPropFlag(BN); 1247 do { 1248 dirProps[--i1] = (byte)(BN | paraDir); 1249 } while (i1 > i0); 1250 } 1251 if (state == LOOKING_FOR_STRONG) { 1252 if (dirProp == L) { 1253 state = FOUND_STRONG_CHAR; 1254 if (paraDir != 0) { 1255 paraDir = 0; 1256 for (i1 = paraStart; i1 < i; i1++) { 1257 dirProps[i1] &= ~CONTEXT_RTL; 1258 } 1259 } 1260 continue; 1261 } 1262 if (dirProp == R || dirProp == AL) { 1263 state = FOUND_STRONG_CHAR; 1264 if (paraDir == 0) { 1265 paraDir = CONTEXT_RTL; 1266 for (i1 = paraStart; i1 < i; i1++) { 1267 dirProps[i1] |= CONTEXT_RTL; 1268 } 1269 } 1270 continue; 1271 } 1272 } 1273 if (dirProp == L) { 1274 lastStrongDir = 0; 1275 lastStrongLTR = i; /* i is index to next character */ 1276 } 1277 else if (dirProp == R) { 1278 lastStrongDir = CONTEXT_RTL; 1279 } 1280 else if (dirProp == AL) { 1281 lastStrongDir = CONTEXT_RTL; 1282 lastArabicPos = i-1; 1283 } 1284 else if (dirProp == B) { 1285 if (i < originalLength) { /* B not last char in text */ 1286 if (!((uchar == (int)CR) && (text[i] == (int)LF))) { 1287 paraCount++; 1288 } 1289 if (isDefaultLevel) { 1290 state=LOOKING_FOR_STRONG; 1291 paraStart = i; /* i is index to next character */ 1292 paraDir = paraDirDefault; 1293 lastStrongDir = paraDirDefault; 1294 } 1295 } 1296 } 1297 } 1298 if (isDefaultLevel) { 1299 paraLevel = GetParaLevelAt(0); 1300 } 1301 1302 /* The following line does nothing new for contextual paraLevel, but is 1303 needed for absolute paraLevel. */ 1304 flags |= DirPropFlagLR(paraLevel); 1305 1306 if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { 1307 flags |= DirPropFlag(L); 1308 } 1309 } 1310 1311 /* perform (X1)..(X9) ------------------------------------------------------- */ 1312 1313 /* determine if the text is mixed-directional or single-directional */ 1314 private byte directionFromFlags() { 1315 /* if the text contains AN and neutrals, then some neutrals may become RTL */ 1316 if (!((flags & MASK_RTL) != 0 || 1317 ((flags & DirPropFlag(AN)) != 0 && 1318 (flags & MASK_POSSIBLE_N) != 0))) { 1319 return Bidi.DIRECTION_LEFT_TO_RIGHT; 1320 } else if ((flags & MASK_LTR) == 0) { 1321 return Bidi.DIRECTION_RIGHT_TO_LEFT; 1322 } else { 1323 return MIXED; 1324 } 1325 } 1326 1327 /* 1328 * Resolve the explicit levels as specified by explicit embedding codes. 1329 * Recalculate the flags to have them reflect the real properties 1330 * after taking the explicit embeddings into account. 1331 * 1332 * The Bidi algorithm is designed to result in the same behavior whether embedding 1333 * levels are externally specified (from "styled text", supposedly the preferred 1334 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. 1335 * That is why (X9) instructs to remove all explicit codes (and BN). 1336 * However, in a real implementation, this removal of these codes and their index 1337 * positions in the plain text is undesirable since it would result in 1338 * reallocated, reindexed text. 1339 * Instead, this implementation leaves the codes in there and just ignores them 1340 * in the subsequent processing. 1341 * In order to get the same reordering behavior, positions with a BN or an 1342 * explicit embedding code just get the same level assigned as the last "real" 1343 * character. 1344 * 1345 * Some implementations, not this one, then overwrite some of these 1346 * directionality properties at "real" same-level-run boundaries by 1347 * L or R codes so that the resolution of weak types can be performed on the 1348 * entire paragraph at once instead of having to parse it once more and 1349 * perform that resolution on same-level-runs. 1350 * This limits the scope of the implicit rules in effectively 1351 * the same way as the run limits. 1352 * 1353 * Instead, this implementation does not modify these codes. 1354 * On one hand, the paragraph has to be scanned for same-level-runs, but 1355 * on the other hand, this saves another loop to reset these codes, 1356 * or saves making and modifying a copy of dirProps[]. 1357 * 1358 * 1359 * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm. 1360 * 1361 * 1362 * Handling the stack of explicit levels (Xn): 1363 * 1364 * With the Bidi stack of explicit levels, 1365 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, 1366 * the explicit level must never exceed MAX_EXPLICIT_LEVEL==61. 1367 * 1368 * In order to have a correct push-pop semantics even in the case of overflows, 1369 * there are two overflow counters: 1370 * - countOver60 is incremented with each LRx at level 60 1371 * - from level 60, one RLx increases the level to 61 1372 * - countOver61 is incremented with each LRx and RLx at level 61 1373 * 1374 * Popping levels with PDF must work in the opposite order so that level 61 1375 * is correct at the correct point. Underflows (too many PDFs) must be checked. 1376 * 1377 * This implementation assumes that MAX_EXPLICIT_LEVEL is odd. 1378 */ 1379 private byte resolveExplicitLevels() { 1380 int i = 0; 1381 byte dirProp; 1382 byte level = GetParaLevelAt(0); 1383 1384 byte dirct; 1385 int paraIndex = 0; 1386 1387 /* determine if the text is mixed-directional or single-directional */ 1388 dirct = directionFromFlags(); 1389 1390 /* we may not need to resolve any explicit levels, but for multiple 1391 paragraphs we want to loop on all chars to set the para boundaries */ 1392 if ((dirct != MIXED) && (paraCount == 1)) { 1393 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ 1394 } else if ((paraCount == 1) && 1395 ((flags & MASK_EXPLICIT) == 0)) { 1396 /* mixed, but all characters are at the same embedding level */ 1397 /* or we are in "inverse Bidi" */ 1398 /* and we don't have contextual multiple paragraphs with some B char */ 1399 /* set all levels to the paragraph level */ 1400 for (i = 0; i < length; ++i) { 1401 levels[i] = level; 1402 } 1403 } else { 1404 /* continue to perform (Xn) */ 1405 1406 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ 1407 /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */ 1408 byte embeddingLevel = level; 1409 byte newLevel; 1410 byte stackTop = 0; 1411 1412 byte[] stack = new byte[MAX_EXPLICIT_LEVEL]; /* we never push anything >=MAX_EXPLICIT_LEVEL */ 1413 int countOver60 = 0; 1414 int countOver61 = 0; /* count overflows of explicit levels */ 1415 1416 /* recalculate the flags */ 1417 flags = 0; 1418 1419 for (i = 0; i < length; ++i) { 1420 dirProp = NoContextRTL(dirProps[i]); 1421 switch(dirProp) { 1422 case LRE: 1423 case LRO: 1424 /* (X3, X5) */ 1425 newLevel = (byte)((embeddingLevel+2) & ~(INTERNAL_LEVEL_OVERRIDE | 1)); /* least greater even level */ 1426 if (newLevel <= MAX_EXPLICIT_LEVEL) { 1427 stack[stackTop] = embeddingLevel; 1428 ++stackTop; 1429 embeddingLevel = newLevel; 1430 if (dirProp == LRO) { 1431 embeddingLevel |= INTERNAL_LEVEL_OVERRIDE; 1432 } 1433 /* we don't need to set LEVEL_OVERRIDE off for LRE 1434 since this has already been done for newLevel which is 1435 the source for embeddingLevel. 1436 */ 1437 } else if ((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL) { 1438 ++countOver61; 1439 } else /* (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL-1 */ { 1440 ++countOver60; 1441 } 1442 flags |= DirPropFlag(BN); 1443 break; 1444 case RLE: 1445 case RLO: 1446 /* (X2, X4) */ 1447 newLevel=(byte)(((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) + 1) | 1); /* least greater odd level */ 1448 if (newLevel<=MAX_EXPLICIT_LEVEL) { 1449 stack[stackTop] = embeddingLevel; 1450 ++stackTop; 1451 embeddingLevel = newLevel; 1452 if (dirProp == RLO) { 1453 embeddingLevel |= INTERNAL_LEVEL_OVERRIDE; 1454 } 1455 /* we don't need to set LEVEL_OVERRIDE off for RLE 1456 since this has already been done for newLevel which is 1457 the source for embeddingLevel. 1458 */ 1459 } else { 1460 ++countOver61; 1461 } 1462 flags |= DirPropFlag(BN); 1463 break; 1464 case PDF: 1465 /* (X7) */ 1466 /* handle all the overflow cases first */ 1467 if (countOver61 > 0) { 1468 --countOver61; 1469 } else if (countOver60 > 0 && (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) != MAX_EXPLICIT_LEVEL) { 1470 /* handle LRx overflows from level 60 */ 1471 --countOver60; 1472 } else if (stackTop > 0) { 1473 /* this is the pop operation; it also pops level 61 while countOver60>0 */ 1474 --stackTop; 1475 embeddingLevel = stack[stackTop]; 1476 /* } else { (underflow) */ 1477 } 1478 flags |= DirPropFlag(BN); 1479 break; 1480 case B: 1481 stackTop = 0; 1482 countOver60 = 0; 1483 countOver61 = 0; 1484 level = GetParaLevelAt(i); 1485 if ((i + 1) < length) { 1486 embeddingLevel = GetParaLevelAt(i+1); 1487 if (!((text[i] == CR) && (text[i + 1] == LF))) { 1488 paras[paraIndex++] = i+1; 1489 } 1490 } 1491 flags |= DirPropFlag(B); 1492 break; 1493 case BN: 1494 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ 1495 /* they will get their levels set correctly in adjustWSLevels() */ 1496 flags |= DirPropFlag(BN); 1497 break; 1498 default: 1499 /* all other types get the "real" level */ 1500 if (level != embeddingLevel) { 1501 level = embeddingLevel; 1502 if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) { 1503 flags |= DirPropFlagO(level) | DirPropFlagMultiRuns; 1504 } else { 1505 flags |= DirPropFlagE(level) | DirPropFlagMultiRuns; 1506 } 1507 } 1508 if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) { 1509 flags |= DirPropFlag(dirProp); 1510 } 1511 break; 1512 } 1513 1514 /* 1515 * We need to set reasonable levels even on BN codes and 1516 * explicit codes because we will later look at same-level runs (X10). 1517 */ 1518 levels[i] = level; 1519 } 1520 if ((flags & MASK_EMBEDDING) != 0) { 1521 flags |= DirPropFlagLR(paraLevel); 1522 } 1523 if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { 1524 flags |= DirPropFlag(L); 1525 } 1526 1527 /* subsequently, ignore the explicit codes and BN (X9) */ 1528 1529 /* again, determine if the text is mixed-directional or single-directional */ 1530 dirct = directionFromFlags(); 1531 } 1532 1533 return dirct; 1534 } 1535 1536 /* 1537 * Use a pre-specified embedding levels array: 1538 * 1539 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), 1540 * ignore all explicit codes (X9), 1541 * and check all the preset levels. 1542 * 1543 * Recalculate the flags to have them reflect the real properties 1544 * after taking the explicit embeddings into account. 1545 */ 1546 private byte checkExplicitLevels() { 1547 byte dirProp; 1548 int i; 1549 this.flags = 0; /* collect all directionalities in the text */ 1550 byte level; 1551 int paraIndex = 0; 1552 1553 for (i = 0; i < length; ++i) { 1554 if (levels[i] == 0) { 1555 levels[i] = paraLevel; 1556 } 1557 if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) { 1558 if ((levels[i] & INTERNAL_LEVEL_OVERRIDE) != 0) { 1559 levels[i] = (byte)(paraLevel|INTERNAL_LEVEL_OVERRIDE); 1560 } else { 1561 levels[i] = paraLevel; 1562 } 1563 } 1564 level = levels[i]; 1565 dirProp = NoContextRTL(dirProps[i]); 1566 if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) { 1567 /* keep the override flag in levels[i] but adjust the flags */ 1568 level &= ~INTERNAL_LEVEL_OVERRIDE; /* make the range check below simpler */ 1569 flags |= DirPropFlagO(level); 1570 } else { 1571 /* set the flags */ 1572 flags |= DirPropFlagE(level) | DirPropFlag(dirProp); 1573 } 1574 1575 if ((level < GetParaLevelAt(i) && 1576 !((0 == level) && (dirProp == B))) || 1577 (MAX_EXPLICIT_LEVEL <level)) { 1578 /* level out of bounds */ 1579 throw new IllegalArgumentException("level " + level + 1580 " out of bounds at index " + i); 1581 } 1582 if ((dirProp == B) && ((i + 1) < length)) { 1583 if (!((text[i] == CR) && (text[i + 1] == LF))) { 1584 paras[paraIndex++] = i + 1; 1585 } 1586 } 1587 } 1588 if ((flags&MASK_EMBEDDING) != 0) { 1589 flags |= DirPropFlagLR(paraLevel); 1590 } 1591 1592 /* determine if the text is mixed-directional or single-directional */ 1593 return directionFromFlags(); 1594 } 1595 1596 /*********************************************************************/ 1597 /* The Properties state machine table */ 1598 /*********************************************************************/ 1599 /* */ 1600 /* All table cells are 8 bits: */ 1601 /* bits 0..4: next state */ 1602 /* bits 5..7: action to perform (if > 0) */ 1603 /* */ 1604 /* Cells may be of format "n" where n represents the next state */ 1605 /* (except for the rightmost column). */ 1606 /* Cells may also be of format "_(x,y)" where x represents an action */ 1607 /* to perform and y represents the next state. */ 1608 /* */ 1609 /*********************************************************************/ 1610 /* Definitions and type for properties state tables */ 1611 /*********************************************************************/ 1612 private static final int IMPTABPROPS_COLUMNS = 14; 1613 private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1; 1614 private static short GetStateProps(short cell) { 1615 return (short)(cell & 0x1f); 1616 } 1617 private static short GetActionProps(short cell) { 1618 return (short)(cell >> 5); 1619 } 1620 1621 private static final short groupProp[] = /* dirProp regrouped */ 1622 { 1623 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */ 1624 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 1625 }; 1626 private static final short _L = 0; 1627 private static final short _R = 1; 1628 private static final short _EN = 2; 1629 private static final short _AN = 3; 1630 private static final short _ON = 4; 1631 private static final short _S = 5; 1632 private static final short _B = 6; /* reduced dirProp */ 1633 1634 /*********************************************************************/ 1635 /* */ 1636 /* PROPERTIES STATE TABLE */ 1637 /* */ 1638 /* In table impTabProps, */ 1639 /* - the ON column regroups ON and WS */ 1640 /* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */ 1641 /* - the Res column is the reduced property assigned to a run */ 1642 /* */ 1643 /* Action 1: process current run1, init new run1 */ 1644 /* 2: init new run2 */ 1645 /* 3: process run1, process run2, init new run1 */ 1646 /* 4: process run1, set run1=run2, init new run2 */ 1647 /* */ 1648 /* Notes: */ 1649 /* 1) This table is used in resolveImplicitLevels(). */ 1650 /* 2) This table triggers actions when there is a change in the Bidi*/ 1651 /* property of incoming characters (action 1). */ 1652 /* 3) Most such property sequences are processed immediately (in */ 1653 /* fact, passed to processPropertySeq(). */ 1654 /* 4) However, numbers are assembled as one sequence. This means */ 1655 /* that undefined situations (like CS following digits, until */ 1656 /* it is known if the next char will be a digit) are held until */ 1657 /* following chars define them. */ 1658 /* Example: digits followed by CS, then comes another CS or ON; */ 1659 /* the digits will be processed, then the CS assigned */ 1660 /* as the start of an ON sequence (action 3). */ 1661 /* 5) There are cases where more than one sequence must be */ 1662 /* processed, for instance digits followed by CS followed by L: */ 1663 /* the digits must be processed as one sequence, and the CS */ 1664 /* must be processed as an ON sequence, all this before starting */ 1665 /* assembling chars for the opening L sequence. */ 1666 /* */ 1667 /* */ 1668 private static final short impTabProps[][] = 1669 { 1670 /* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, Res */ 1671 /* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, _ON }, 1672 /* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, _L }, 1673 /* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, _R }, 1674 /* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, _R }, 1675 /* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, _EN }, 1676 /* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, _AN }, 1677 /* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, _AN }, 1678 /* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, _ON }, 1679 /* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, _ON }, 1680 /* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, _ON }, 1681 /*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, _EN }, 1682 /*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, _EN }, 1683 /*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, _AN }, 1684 /*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, _AN }, 1685 /*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3, _ON }, 1686 /*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, _S }, 1687 /*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, _S }, 1688 /*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, _B } 1689 }; 1690 1691 /*********************************************************************/ 1692 /* The levels state machine tables */ 1693 /*********************************************************************/ 1694 /* */ 1695 /* All table cells are 8 bits: */ 1696 /* bits 0..3: next state */ 1697 /* bits 4..7: action to perform (if > 0) */ 1698 /* */ 1699 /* Cells may be of format "n" where n represents the next state */ 1700 /* (except for the rightmost column). */ 1701 /* Cells may also be of format "_(x,y)" where x represents an action */ 1702 /* to perform and y represents the next state. */ 1703 /* */ 1704 /* This format limits each table to 16 states each and to 15 actions.*/ 1705 /* */ 1706 /*********************************************************************/ 1707 /* Definitions and type for levels state tables */ 1708 /*********************************************************************/ 1709 private static final int IMPTABLEVELS_COLUMNS = _B + 2; 1710 private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1; 1711 private static short GetState(byte cell) { return (short)(cell & 0x0f); } 1712 private static short GetAction(byte cell) { return (short)(cell >> 4); } 1713 1714 private static class ImpTabPair { 1715 byte[][][] imptab; 1716 short[][] impact; 1717 1718 ImpTabPair(byte[][] table1, byte[][] table2, 1719 short[] act1, short[] act2) { 1720 imptab = new byte[][][] {table1, table2}; 1721 impact = new short[][] {act1, act2}; 1722 } 1723 } 1724 1725 /*********************************************************************/ 1726 /* */ 1727 /* LEVELS STATE TABLES */ 1728 /* */ 1729 /* In all levels state tables, */ 1730 /* - state 0 is the initial state */ 1731 /* - the Res column is the increment to add to the text level */ 1732 /* for this property sequence. */ 1733 /* */ 1734 /* The impact arrays for each table of a pair map the local action */ 1735 /* numbers of the table to the total list of actions. For instance, */ 1736 /* action 2 in a given table corresponds to the action number which */ 1737 /* appears in entry [2] of the impact array for that table. */ 1738 /* The first entry of all impact arrays must be 0. */ 1739 /* */ 1740 /* Action 1: init conditional sequence */ 1741 /* 2: prepend conditional sequence to current sequence */ 1742 /* 3: set ON sequence to new level - 1 */ 1743 /* 4: init EN/AN/ON sequence */ 1744 /* 5: fix EN/AN/ON sequence followed by R */ 1745 /* 6: set previous level sequence to level 2 */ 1746 /* */ 1747 /* Notes: */ 1748 /* 1) These tables are used in processPropertySeq(). The input */ 1749 /* is property sequences as determined by resolveImplicitLevels. */ 1750 /* 2) Most such property sequences are processed immediately */ 1751 /* (levels are assigned). */ 1752 /* 3) However, some sequences cannot be assigned a final level till */ 1753 /* one or more following sequences are received. For instance, */ 1754 /* ON following an R sequence within an even-level paragraph. */ 1755 /* If the following sequence is R, the ON sequence will be */ 1756 /* assigned basic run level+1, and so will the R sequence. */ 1757 /* 4) S is generally handled like ON, since its level will be fixed */ 1758 /* to paragraph level in adjustWSLevels(). */ 1759 /* */ 1760 1761 private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */ 1762 /* In this table, conditional sequences receive the higher possible level 1763 until proven otherwise. 1764 */ 1765 { 1766 /* L, R, EN, AN, ON, S, B, Res */ 1767 /* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 }, 1768 /* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 }, 1769 /* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 }, 1770 /* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 }, 1771 /* 4 : R+ON */ { 0x20, 1, 3, 3, 4, 4, 0x20, 1 }, 1772 /* 5 : AN+ON */ { 0x20, 1, 0x20, 2, 5, 5, 0x20, 1 } 1773 }; 1774 1775 private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */ 1776 /* In this table, conditional sequences receive the lower possible level 1777 until proven otherwise. 1778 */ 1779 { 1780 /* L, R, EN, AN, ON, S, B, Res */ 1781 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, 1782 /* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 }, 1783 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, 1784 /* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 }, 1785 /* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 }, 1786 /* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 } 1787 }; 1788 1789 private static final short[] impAct0 = {0,1,2,3,4,5,6}; 1790 1791 private static final ImpTabPair impTab_DEFAULT = new ImpTabPair( 1792 impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0); 1793 1794 private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */ 1795 /* In this table, conditional sequences receive the higher possible 1796 level until proven otherwise. 1797 */ 1798 /* L, R, EN, AN, ON, S, B, Res */ 1799 /* 0 : init */ { 0, 2, 1, 1, 0, 0, 0, 0 }, 1800 /* 1 : L+EN/AN */ { 0, 2, 1, 1, 0, 0, 0, 2 }, 1801 /* 2 : R */ { 0, 2, 4, 4, 0x13, 0, 0, 1 }, 1802 /* 3 : R+ON */ { 0x20, 2, 4, 4, 3, 3, 0x20, 1 }, 1803 /* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 } 1804 }; 1805 private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair( 1806 impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0); 1807 1808 private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = { 1809 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1810 until proven that there is L or sor/eor on both sides. AN is handled like EN. 1811 */ 1812 /* L, R, EN, AN, ON, S, B, Res */ 1813 /* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 }, 1814 /* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 }, 1815 /* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 }, 1816 /* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 }, 1817 /* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 }, 1818 /* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 } 1819 }; 1820 private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = { 1821 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1822 until proven that there is L on both sides. AN is handled like EN. 1823 */ 1824 /* L, R, EN, AN, ON, S, B, Res */ 1825 /* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 }, 1826 /* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 }, 1827 /* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 }, 1828 /* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 }, 1829 /* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 } 1830 }; 1831 private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new 1832 ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R, 1833 impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0); 1834 1835 private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = { 1836 /* This table is identical to the Default LTR table except that EN and AN 1837 are handled like L. 1838 */ 1839 /* L, R, EN, AN, ON, S, B, Res */ 1840 /* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 }, 1841 /* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 }, 1842 /* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 }, 1843 /* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 }, 1844 /* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 }, 1845 /* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 } 1846 }; 1847 private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = { 1848 /* This table is identical to the Default RTL table except that EN and AN 1849 are handled like L. 1850 */ 1851 /* L, R, EN, AN, ON, S, B, Res */ 1852 /* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 }, 1853 /* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 }, 1854 /* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 }, 1855 /* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 }, 1856 /* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 }, 1857 /* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 } 1858 }; 1859 private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair 1860 (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L, 1861 impAct0, impAct0); 1862 1863 private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = { /* Odd paragraph level */ 1864 /* In this table, conditional sequences receive the lower possible level 1865 until proven otherwise. 1866 */ 1867 /* L, R, EN, AN, ON, S, B, Res */ 1868 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, 1869 /* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 }, 1870 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, 1871 /* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 }, 1872 /* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 }, 1873 /* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 }, 1874 /* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 } 1875 }; 1876 private static final short[] impAct1 = {0,1,11,12}; 1877 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair( 1878 impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); 1879 1880 private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = { 1881 /* The case handled in this table is (visually): R EN L 1882 */ 1883 /* L, R, EN, AN, ON, S, B, Res */ 1884 /* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 }, 1885 /* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 }, 1886 /* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 }, 1887 /* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 }, 1888 /* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 }, 1889 /* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 }, 1890 /* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 } 1891 }; 1892 private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = { 1893 /* The cases handled in this table are (visually): R EN L 1894 R L AN L 1895 */ 1896 /* L, R, EN, AN, ON, S, B, Res */ 1897 /* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 }, 1898 /* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 }, 1899 /* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 }, 1900 /* 3 : L */ { 3 , 0, 3, 0x36, 0x14, 0x40, 0, 1 }, 1901 /* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 }, 1902 /* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 }, 1903 /* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 } 1904 }; 1905 private static final short impAct2[] = {0,1,7,8,9,10}; 1906 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = 1907 new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, 1908 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2); 1909 1910 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair( 1911 impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); 1912 1913 private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = { 1914 /* The case handled in this table is (visually): R EN L 1915 */ 1916 /* L, R, EN, AN, ON, S, B, Res */ 1917 /* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 }, 1918 /* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 }, 1919 /* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 }, 1920 /* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 }, 1921 /* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 } 1922 }; 1923 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new 1924 ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, 1925 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2); 1926 1927 private class LevState { 1928 byte[][] impTab; /* level table pointer */ 1929 short[] impAct; /* action map array */ 1930 int startON; /* start of ON sequence */ 1931 int startL2EN; /* start of level 2 sequence */ 1932 int lastStrongRTL; /* index of last found R or AL */ 1933 short state; /* current state */ 1934 byte runLevel; /* run level before implicit solving */ 1935 } 1936 1937 /*------------------------------------------------------------------------*/ 1938 1939 static final int FIRSTALLOC = 10; 1940 /* 1941 * param pos: position where to insert 1942 * param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER 1943 */ 1944 private void addPoint(int pos, int flag) 1945 { 1946 Point point = new Point(); 1947 1948 int len = insertPoints.points.length; 1949 if (len == 0) { 1950 insertPoints.points = new Point[FIRSTALLOC]; 1951 len = FIRSTALLOC; 1952 } 1953 if (insertPoints.size >= len) { /* no room for new point */ 1954 Point[] savePoints = insertPoints.points; 1955 insertPoints.points = new Point[len * 2]; 1956 System.arraycopy(savePoints, 0, insertPoints.points, 0, len); 1957 } 1958 point.pos = pos; 1959 point.flag = flag; 1960 insertPoints.points[insertPoints.size] = point; 1961 insertPoints.size++; 1962 } 1963 1964 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ 1965 1966 /* 1967 * This implementation of the (Wn) rules applies all rules in one pass. 1968 * In order to do so, it needs a look-ahead of typically 1 character 1969 * (except for W5: sequences of ET) and keeps track of changes 1970 * in a rule Wp that affect a later Wq (p<q). 1971 * 1972 * The (Nn) and (In) rules are also performed in that same single loop, 1973 * but effectively one iteration behind for white space. 1974 * 1975 * Since all implicit rules are performed in one step, it is not necessary 1976 * to actually store the intermediate directional properties in dirProps[]. 1977 */ 1978 1979 private void processPropertySeq(LevState levState, short _prop, 1980 int start, int limit) { 1981 byte cell; 1982 byte[][] impTab = levState.impTab; 1983 short[] impAct = levState.impAct; 1984 short oldStateSeq,actionSeq; 1985 byte level, addLevel; 1986 int start0, k; 1987 1988 start0 = start; /* save original start position */ 1989 oldStateSeq = levState.state; 1990 cell = impTab[oldStateSeq][_prop]; 1991 levState.state = GetState(cell); /* isolate the new state */ 1992 actionSeq = impAct[GetAction(cell)]; /* isolate the action */ 1993 addLevel = (byte)impTab[levState.state][IMPTABLEVELS_RES]; 1994 1995 if (actionSeq != 0) { 1996 switch (actionSeq) { 1997 case 1: /* init ON seq */ 1998 levState.startON = start0; 1999 break; 2000 2001 case 2: /* prepend ON seq to current seq */ 2002 start = levState.startON; 2003 break; 2004 2005 case 3: /* L or S after possible relevant EN/AN */ 2006 /* check if we had EN after R/AL */ 2007 if (levState.startL2EN >= 0) { 2008 addPoint(levState.startL2EN, LRM_BEFORE); 2009 } 2010 levState.startL2EN = -1; /* not within previous if since could also be -2 */ 2011 /* check if we had any relevant EN/AN after R/AL */ 2012 if ((insertPoints.points.length == 0) || 2013 (insertPoints.size <= insertPoints.confirmed)) { 2014 /* nothing, just clean up */ 2015 levState.lastStrongRTL = -1; 2016 /* check if we have a pending conditional segment */ 2017 level = (byte)impTab[oldStateSeq][IMPTABLEVELS_RES]; 2018 if ((level & 1) != 0 && levState.startON > 0) { /* after ON */ 2019 start = levState.startON; /* reset to basic run level */ 2020 } 2021 if (_prop == _S) { /* add LRM before S */ 2022 addPoint(start0, LRM_BEFORE); 2023 insertPoints.confirmed = insertPoints.size; 2024 } 2025 break; 2026 } 2027 /* reset previous RTL cont to level for LTR text */ 2028 for (k = levState.lastStrongRTL + 1; k < start0; k++) { 2029 /* reset odd level, leave runLevel+2 as is */ 2030 levels[k] = (byte)((levels[k] - 2) & ~1); 2031 } 2032 /* mark insert points as confirmed */ 2033 insertPoints.confirmed = insertPoints.size; 2034 levState.lastStrongRTL = -1; 2035 if (_prop == _S) { /* add LRM before S */ 2036 addPoint(start0, LRM_BEFORE); 2037 insertPoints.confirmed = insertPoints.size; 2038 } 2039 break; 2040 2041 case 4: /* R/AL after possible relevant EN/AN */ 2042 /* just clean up */ 2043 if (insertPoints.points.length > 0) 2044 /* remove all non confirmed insert points */ 2045 insertPoints.size = insertPoints.confirmed; 2046 levState.startON = -1; 2047 levState.startL2EN = -1; 2048 levState.lastStrongRTL = limit - 1; 2049 break; 2050 2051 case 5: /* EN/AN after R/AL + possible cont */ 2052 /* check for real AN */ 2053 if ((_prop == _AN) && (NoContextRTL(dirProps[start0]) == AN)) { 2054 /* real AN */ 2055 if (levState.startL2EN == -1) { /* if no relevant EN already found */ 2056 /* just note the righmost digit as a strong RTL */ 2057 levState.lastStrongRTL = limit - 1; 2058 break; 2059 } 2060 if (levState.startL2EN >= 0) { /* after EN, no AN */ 2061 addPoint(levState.startL2EN, LRM_BEFORE); 2062 levState.startL2EN = -2; 2063 } 2064 /* note AN */ 2065 addPoint(start0, LRM_BEFORE); 2066 break; 2067 } 2068 /* if first EN/AN after R/AL */ 2069 if (levState.startL2EN == -1) { 2070 levState.startL2EN = start0; 2071 } 2072 break; 2073 2074 case 6: /* note location of latest R/AL */ 2075 levState.lastStrongRTL = limit - 1; 2076 levState.startON = -1; 2077 break; 2078 2079 case 7: /* L after R+ON/EN/AN */ 2080 /* include possible adjacent number on the left */ 2081 for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) { 2082 } 2083 if (k >= 0) { 2084 addPoint(k, RLM_BEFORE); /* add RLM before */ 2085 insertPoints.confirmed = insertPoints.size; /* confirm it */ 2086 } 2087 levState.startON = start0; 2088 break; 2089 2090 case 8: /* AN after L */ 2091 /* AN numbers between L text on both sides may be trouble. */ 2092 /* tentatively bracket with LRMs; will be confirmed if followed by L */ 2093 addPoint(start0, LRM_BEFORE); /* add LRM before */ 2094 addPoint(start0, LRM_AFTER); /* add LRM after */ 2095 break; 2096 2097 case 9: /* R after L+ON/EN/AN */ 2098 /* false alert, infirm LRMs around previous AN */ 2099 insertPoints.size=insertPoints.confirmed; 2100 if (_prop == _S) { /* add RLM before S */ 2101 addPoint(start0, RLM_BEFORE); 2102 insertPoints.confirmed = insertPoints.size; 2103 } 2104 break; 2105 2106 case 10: /* L after L+ON/AN */ 2107 level = (byte)(levState.runLevel + addLevel); 2108 for (k=levState.startON; k < start0; k++) { 2109 if (levels[k] < level) { 2110 levels[k] = level; 2111 } 2112 } 2113 insertPoints.confirmed = insertPoints.size; /* confirm inserts */ 2114 levState.startON = start0; 2115 break; 2116 2117 case 11: /* L after L+ON+EN/AN/ON */ 2118 level = (byte)levState.runLevel; 2119 for (k = start0-1; k >= levState.startON; k--) { 2120 if (levels[k] == level+3) { 2121 while (levels[k] == level+3) { 2122 levels[k--] -= 2; 2123 } 2124 while (levels[k] == level) { 2125 k--; 2126 } 2127 } 2128 if (levels[k] == level+2) { 2129 levels[k] = level; 2130 continue; 2131 } 2132 levels[k] = (byte)(level+1); 2133 } 2134 break; 2135 2136 case 12: /* R after L+ON+EN/AN/ON */ 2137 level = (byte)(levState.runLevel+1); 2138 for (k = start0-1; k >= levState.startON; k--) { 2139 if (levels[k] > level) { 2140 levels[k] -= 2; 2141 } 2142 } 2143 break; 2144 2145 default: /* we should never get here */ 2146 throw new IllegalStateException("Internal ICU error in processPropertySeq"); 2147 } 2148 } 2149 if ((addLevel) != 0 || (start < start0)) { 2150 level = (byte)(levState.runLevel + addLevel); 2151 for (k = start; k < limit; k++) { 2152 levels[k] = level; 2153 } 2154 } 2155 } 2156 2157 private void resolveImplicitLevels(int start, int limit, short sor, short eor) 2158 { 2159 LevState levState = new LevState(); 2160 int i, start1, start2; 2161 short oldStateImp, stateImp, actionImp; 2162 short gprop, resProp, cell; 2163 short nextStrongProp = R; 2164 int nextStrongPos = -1; 2165 2166 2167 /* check for RTL inverse Bidi mode */ 2168 /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to 2169 * loop on the text characters from end to start. 2170 * This would need a different properties state table (at least different 2171 * actions) and different levels state tables (maybe very similar to the 2172 * LTR corresponding ones. 2173 */ 2174 /* initialize for levels state table */ 2175 levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 2176 levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 2177 levState.state = 0; 2178 levState.runLevel = levels[start]; 2179 levState.impTab = impTabPair.imptab[levState.runLevel & 1]; 2180 levState.impAct = impTabPair.impact[levState.runLevel & 1]; 2181 processPropertySeq(levState, (short)sor, start, start); 2182 /* initialize for property state table */ 2183 if (dirProps[start] == NSM) { 2184 stateImp = (short)(1 + sor); 2185 } else { 2186 stateImp = 0; 2187 } 2188 start1 = start; 2189 start2 = 0; 2190 2191 for (i = start; i <= limit; i++) { 2192 if (i >= limit) { 2193 gprop = eor; 2194 } else { 2195 short prop, prop1; 2196 prop = NoContextRTL(dirProps[i]); 2197 gprop = groupProp[prop]; 2198 } 2199 oldStateImp = stateImp; 2200 cell = impTabProps[oldStateImp][gprop]; 2201 stateImp = GetStateProps(cell); /* isolate the new state */ 2202 actionImp = GetActionProps(cell); /* isolate the action */ 2203 if ((i == limit) && (actionImp == 0)) { 2204 /* there is an unprocessed sequence if its property == eor */ 2205 actionImp = 1; /* process the last sequence */ 2206 } 2207 if (actionImp != 0) { 2208 resProp = impTabProps[oldStateImp][IMPTABPROPS_RES]; 2209 switch (actionImp) { 2210 case 1: /* process current seq1, init new seq1 */ 2211 processPropertySeq(levState, resProp, start1, i); 2212 start1 = i; 2213 break; 2214 case 2: /* init new seq2 */ 2215 start2 = i; 2216 break; 2217 case 3: /* process seq1, process seq2, init new seq1 */ 2218 processPropertySeq(levState, resProp, start1, start2); 2219 processPropertySeq(levState, _ON, start2, i); 2220 start1 = i; 2221 break; 2222 case 4: /* process seq1, set seq1=seq2, init new seq2 */ 2223 processPropertySeq(levState, resProp, start1, start2); 2224 start1 = start2; 2225 start2 = i; 2226 break; 2227 default: /* we should never get here */ 2228 throw new IllegalStateException("Internal ICU error in resolveImplicitLevels"); 2229 } 2230 } 2231 } 2232 /* flush possible pending sequence, e.g. ON */ 2233 processPropertySeq(levState, (short)eor, limit, limit); 2234 } 2235 2236 /* perform (L1) and (X9) ---------------------------------------------------- */ 2237 2238 /* 2239 * Reset the embedding levels for some non-graphic characters (L1). 2240 * This method also sets appropriate levels for BN, and 2241 * explicit embedding types that are supposed to have been removed 2242 * from the paragraph in (X9). 2243 */ 2244 private void adjustWSLevels() { 2245 int i; 2246 2247 if ((flags & MASK_WS) != 0) { 2248 int flag; 2249 i = trailingWSStart; 2250 while (i > 0) { 2251 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ 2252 while (i > 0 && ((flag = DirPropFlagNC(dirProps[--i])) & MASK_WS) != 0) { 2253 if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { 2254 levels[i] = 0; 2255 } else { 2256 levels[i] = GetParaLevelAt(i); 2257 } 2258 } 2259 2260 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ 2261 /* here, i+1 is guaranteed to be <length */ 2262 while (i > 0) { 2263 flag = DirPropFlagNC(dirProps[--i]); 2264 if ((flag & MASK_BN_EXPLICIT) != 0) { 2265 levels[i] = levels[i + 1]; 2266 } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { 2267 levels[i] = 0; 2268 break; 2269 } else if ((flag & MASK_B_S) != 0){ 2270 levels[i] = GetParaLevelAt(i); 2271 break; 2272 } 2273 } 2274 } 2275 } 2276 } 2277 2278 private int Bidi_Min(int x, int y) { 2279 return x < y ? x : y; 2280 } 2281 2282 private int Bidi_Abs(int x) { 2283 return x >= 0 ? x : -x; 2284 } 2285 2286 /** 2287 * Perform the Unicode Bidi algorithm. It is defined in the 2288 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 2289 * version 13, 2290 * also described in The Unicode Standard, Version 4.0 .<p> 2291 * 2292 * This method takes a piece of plain text containing one or more paragraphs, 2293 * with or without externally specified embedding levels from <i>styled</i> 2294 * text and computes the left-right-directionality of each character.<p> 2295 * 2296 * If the entire text is all of the same directionality, then 2297 * the method may not perform all the steps described by the algorithm, 2298 * i.e., some levels may not be the same as if all steps were performed. 2299 * This is not relevant for unidirectional text.<br> 2300 * For example, in pure LTR text with numbers the numbers would get 2301 * a resolved level of 2 higher than the surrounding text according to 2302 * the algorithm. This implementation may set all resolved levels to 2303 * the same value in such a case.<p> 2304 * 2305 * The text can be composed of multiple paragraphs. Occurrence of a block 2306 * separator in the text terminates a paragraph, and whatever comes next starts 2307 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 2308 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 2309 * in that case, the pair of characters is considered as terminating the 2310 * preceding paragraph, and a new paragraph will be started by a character 2311 * coming after the LF. 2312 * 2313 * Although the text is passed here as a <code>String</code>, it is 2314 * stored internally as an array of characters. Therefore the 2315 * documentation will refer to indexes of the characters in the text. 2316 * 2317 * @param text contains the text that the Bidi algorithm will be performed 2318 * on. This text can be retrieved with <code>getText()</code> or 2319 * <code>getTextAsString</code>.<br> 2320 * 2321 * @param paraLevel specifies the default level for the text; 2322 * it is typically 0 (LTR) or 1 (RTL). 2323 * If the method shall determine the paragraph level from the text, 2324 * then <code>paraLevel</code> can be set to 2325 * either <code>LEVEL_DEFAULT_LTR</code> 2326 * or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple 2327 * paragraphs, the paragraph level shall be determined separately for 2328 * each paragraph; if a paragraph does not include any strongly typed 2329 * character, then the desired default is used (0 for LTR or 1 for RTL). 2330 * Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code> 2331 * is also valid, with odd levels indicating RTL. 2332 * 2333 * @param embeddingLevels (in) may be used to preset the embedding and override levels, 2334 * ignoring characters like LRE and PDF in the text. 2335 * A level overrides the directional property of its corresponding 2336 * (same index) character if the level has the 2337 * <code>LEVEL_OVERRIDE</code> bit set.<br><br> 2338 * Except for that bit, it must be 2339 * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, 2340 * with one exception: a level of zero may be specified for a 2341 * paragraph separator even if <code>paraLevel>0</code> when multiple 2342 * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> 2343 * <strong>Caution: </strong>A reference to this array, not a copy 2344 * of the levels, will be stored in the <code>Bidi</code> object; 2345 * the <code>embeddingLevels</code> 2346 * should not be modified to avoid unexpected results on subsequent 2347 * Bidi operations. However, the <code>setPara()</code> and 2348 * <code>setLine()</code> methods may modify some or all of the 2349 * levels.<br><br> 2350 * <strong>Note:</strong> the <code>embeddingLevels</code> array must 2351 * have one entry for each character in <code>text</code>. 2352 * 2353 * @throws IllegalArgumentException if the values in embeddingLevels are 2354 * not within the allowed range 2355 * 2356 * @see #LEVEL_DEFAULT_LTR 2357 * @see #LEVEL_DEFAULT_RTL 2358 * @see #LEVEL_OVERRIDE 2359 * @see #MAX_EXPLICIT_LEVEL 2360 * @stable ICU 3.8 2361 */ 2362 void setPara(String text, byte paraLevel, byte[] embeddingLevels) 2363 { 2364 if (text == null) { 2365 setPara(new char[0], paraLevel, embeddingLevels); 2366 } else { 2367 setPara(text.toCharArray(), paraLevel, embeddingLevels); 2368 } 2369 } 2370 2371 /** 2372 * Perform the Unicode Bidi algorithm. It is defined in the 2373 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 2374 * version 13, 2375 * also described in The Unicode Standard, Version 4.0 .<p> 2376 * 2377 * This method takes a piece of plain text containing one or more paragraphs, 2378 * with or without externally specified embedding levels from <i>styled</i> 2379 * text and computes the left-right-directionality of each character.<p> 2380 * 2381 * If the entire text is all of the same directionality, then 2382 * the method may not perform all the steps described by the algorithm, 2383 * i.e., some levels may not be the same as if all steps were performed. 2384 * This is not relevant for unidirectional text.<br> 2385 * For example, in pure LTR text with numbers the numbers would get 2386 * a resolved level of 2 higher than the surrounding text according to 2387 * the algorithm. This implementation may set all resolved levels to 2388 * the same value in such a case.<p> 2389 * 2390 * The text can be composed of multiple paragraphs. Occurrence of a block 2391 * separator in the text terminates a paragraph, and whatever comes next starts 2392 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 2393 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 2394 * in that case, the pair of characters is considered as terminating the 2395 * preceding paragraph, and a new paragraph will be started by a character 2396 * coming after the LF. 2397 * 2398 * The text is stored internally as an array of characters. Therefore the 2399 * documentation will refer to indexes of the characters in the text. 2400 * 2401 * @param chars contains the text that the Bidi algorithm will be performed 2402 * on. This text can be retrieved with <code>getText()</code> or 2403 * <code>getTextAsString</code>.<br> 2404 * 2405 * @param paraLevel specifies the default level for the text; 2406 * it is typically 0 (LTR) or 1 (RTL). 2407 * If the method shall determine the paragraph level from the text, 2408 * then <code>paraLevel</code> can be set to 2409 * either <code>LEVEL_DEFAULT_LTR</code> 2410 * or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple 2411 * paragraphs, the paragraph level shall be determined separately for 2412 * each paragraph; if a paragraph does not include any strongly typed 2413 * character, then the desired default is used (0 for LTR or 1 for RTL). 2414 * Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code> 2415 * is also valid, with odd levels indicating RTL. 2416 * 2417 * @param embeddingLevels (in) may be used to preset the embedding and 2418 * override levels, ignoring characters like LRE and PDF in the text. 2419 * A level overrides the directional property of its corresponding 2420 * (same index) character if the level has the 2421 * <code>LEVEL_OVERRIDE</code> bit set.<br><br> 2422 * Except for that bit, it must be 2423 * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, 2424 * with one exception: a level of zero may be specified for a 2425 * paragraph separator even if <code>paraLevel>0</code> when multiple 2426 * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> 2427 * <strong>Caution: </strong>A reference to this array, not a copy 2428 * of the levels, will be stored in the <code>Bidi</code> object; 2429 * the <code>embeddingLevels</code> 2430 * should not be modified to avoid unexpected results on subsequent 2431 * Bidi operations. However, the <code>setPara()</code> and 2432 * <code>setLine()</code> methods may modify some or all of the 2433 * levels.<br><br> 2434 * <strong>Note:</strong> the <code>embeddingLevels</code> array must 2435 * have one entry for each character in <code>text</code>. 2436 * 2437 * @throws IllegalArgumentException if the values in embeddingLevels are 2438 * not within the allowed range 2439 * 2440 * @see #LEVEL_DEFAULT_LTR 2441 * @see #LEVEL_DEFAULT_RTL 2442 * @see #LEVEL_OVERRIDE 2443 * @see #MAX_EXPLICIT_LEVEL 2444 * @stable ICU 3.8 2445 */ 2446 public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels) 2447 { 2448 /* check the argument values */ 2449 if (paraLevel < INTERNAL_LEVEL_DEFAULT_LTR) { 2450 verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1); 2451 } 2452 if (chars == null) { 2453 chars = new char[0]; 2454 } 2455 2456 /* initialize the Bidi object */ 2457 this.paraBidi = null; /* mark unfinished setPara */ 2458 this.text = chars; 2459 this.length = this.originalLength = this.resultLength = text.length; 2460 this.paraLevel = paraLevel; 2461 this.direction = Bidi.DIRECTION_LEFT_TO_RIGHT; 2462 this.paraCount = 1; 2463 2464 /* Allocate zero-length arrays instead of setting to null here; then 2465 * checks for null in various places can be eliminated. 2466 */ 2467 dirProps = new byte[0]; 2468 levels = new byte[0]; 2469 runs = new BidiRun[0]; 2470 isGoodLogicalToVisualRunsMap = false; 2471 insertPoints.size = 0; /* clean up from last call */ 2472 insertPoints.confirmed = 0; /* clean up from last call */ 2473 2474 /* 2475 * Save the original paraLevel if contextual; otherwise, set to 0. 2476 */ 2477 if (IsDefaultLevel(paraLevel)) { 2478 defaultParaLevel = paraLevel; 2479 } else { 2480 defaultParaLevel = 0; 2481 } 2482 2483 if (length == 0) { 2484 /* 2485 * For an empty paragraph, create a Bidi object with the paraLevel and 2486 * the flags and the direction set but without allocating zero-length arrays. 2487 * There is nothing more to do. 2488 */ 2489 if (IsDefaultLevel(paraLevel)) { 2490 this.paraLevel &= 1; 2491 defaultParaLevel = 0; 2492 } 2493 if ((this.paraLevel & 1) != 0) { 2494 flags = DirPropFlag(R); 2495 direction = Bidi.DIRECTION_RIGHT_TO_LEFT; 2496 } else { 2497 flags = DirPropFlag(L); 2498 direction = Bidi.DIRECTION_LEFT_TO_RIGHT; 2499 } 2500 2501 runCount = 0; 2502 paraCount = 0; 2503 paraBidi = this; /* mark successful setPara */ 2504 return; 2505 } 2506 2507 runCount = -1; 2508 2509 /* 2510 * Get the directional properties, 2511 * the flags bit-set, and 2512 * determine the paragraph level if necessary. 2513 */ 2514 getDirPropsMemory(length); 2515 dirProps = dirPropsMemory; 2516 getDirProps(); 2517 2518 /* the processed length may have changed if OPTION_STREAMING is set */ 2519 trailingWSStart = length; /* the levels[] will reflect the WS run */ 2520 2521 /* allocate paras memory */ 2522 if (paraCount > 1) { 2523 getInitialParasMemory(paraCount); 2524 paras = parasMemory; 2525 paras[paraCount - 1] = length; 2526 } else { 2527 /* initialize paras for single paragraph */ 2528 paras = simpleParas; 2529 simpleParas[0] = length; 2530 } 2531 2532 /* are explicit levels specified? */ 2533 if (embeddingLevels == null) { 2534 /* no: determine explicit levels according to the (Xn) rules */ 2535 getLevelsMemory(length); 2536 levels = levelsMemory; 2537 direction = resolveExplicitLevels(); 2538 } else { 2539 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */ 2540 levels = embeddingLevels; 2541 direction = checkExplicitLevels(); 2542 } 2543 2544 /* 2545 * The steps after (X9) in the Bidi algorithm are performed only if 2546 * the paragraph text has mixed directionality! 2547 */ 2548 switch (direction) { 2549 case Bidi.DIRECTION_LEFT_TO_RIGHT: 2550 /* make sure paraLevel is even */ 2551 paraLevel = (byte)((paraLevel + 1) & ~1); 2552 2553 /* all levels are implicitly at paraLevel (important for getLevels()) */ 2554 trailingWSStart = 0; 2555 break; 2556 case Bidi.DIRECTION_RIGHT_TO_LEFT: 2557 /* make sure paraLevel is odd */ 2558 paraLevel |= 1; 2559 2560 /* all levels are implicitly at paraLevel (important for getLevels()) */ 2561 trailingWSStart = 0; 2562 break; 2563 default: 2564 this.impTabPair = impTab_DEFAULT; 2565 2566 /* 2567 * If there are no external levels specified and there 2568 * are no significant explicit level codes in the text, 2569 * then we can treat the entire paragraph as one run. 2570 * Otherwise, we need to perform the following rules on runs of 2571 * the text with the same embedding levels. (X10) 2572 * "Significant" explicit level codes are ones that actually 2573 * affect non-BN characters. 2574 * Examples for "insignificant" ones are empty embeddings 2575 * LRE-PDF, LRE-RLE-PDF-PDF, etc. 2576 */ 2577 if (embeddingLevels == null && paraCount <= 1 && 2578 (flags & DirPropFlagMultiRuns) == 0) { 2579 resolveImplicitLevels(0, length, 2580 GetLRFromLevel(GetParaLevelAt(0)), 2581 GetLRFromLevel(GetParaLevelAt(length - 1))); 2582 } else { 2583 /* sor, eor: start and end types of same-level-run */ 2584 int start, limit = 0; 2585 byte level, nextLevel; 2586 short sor, eor; 2587 2588 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ 2589 level = GetParaLevelAt(0); 2590 nextLevel = levels[0]; 2591 if (level < nextLevel) { 2592 eor = GetLRFromLevel(nextLevel); 2593 } else { 2594 eor = GetLRFromLevel(level); 2595 } 2596 2597 do { 2598 /* determine start and limit of the run (end points just behind the run) */ 2599 2600 /* the values for this run's start are the same as for the previous run's end */ 2601 start = limit; 2602 level = nextLevel; 2603 if ((start > 0) && (NoContextRTL(dirProps[start - 1]) == B)) { 2604 /* except if this is a new paragraph, then set sor = para level */ 2605 sor = GetLRFromLevel(GetParaLevelAt(start)); 2606 } else { 2607 sor = eor; 2608 } 2609 2610 /* search for the limit of this run */ 2611 while (++limit < length && levels[limit] == level) {} 2612 2613 /* get the correct level of the next run */ 2614 if (limit < length) { 2615 nextLevel = levels[limit]; 2616 } else { 2617 nextLevel = GetParaLevelAt(length - 1); 2618 } 2619 2620 /* determine eor from max(level, nextLevel); sor is last run's eor */ 2621 if ((level & ~INTERNAL_LEVEL_OVERRIDE) < (nextLevel & ~INTERNAL_LEVEL_OVERRIDE)) { 2622 eor = GetLRFromLevel(nextLevel); 2623 } else { 2624 eor = GetLRFromLevel(level); 2625 } 2626 2627 /* if the run consists of overridden directional types, then there 2628 are no implicit types to be resolved */ 2629 if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) { 2630 resolveImplicitLevels(start, limit, sor, eor); 2631 } else { 2632 /* remove the LEVEL_OVERRIDE flags */ 2633 do { 2634 levels[start++] &= ~INTERNAL_LEVEL_OVERRIDE; 2635 } while (start < limit); 2636 } 2637 } while (limit < length); 2638 } 2639 2640 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ 2641 adjustWSLevels(); 2642 2643 break; 2644 } 2645 2646 resultLength += insertPoints.size; 2647 paraBidi = this; /* mark successful setPara */ 2648 } 2649 2650 /** 2651 * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the 2652 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 2653 * version 13, 2654 * also described in The Unicode Standard, Version 4.0 .<p> 2655 * 2656 * This method takes a paragraph of text and computes the 2657 * left-right-directionality of each character. The text should not 2658 * contain any Unicode block separators.<p> 2659 * 2660 * The RUN_DIRECTION attribute in the text, if present, determines the base 2661 * direction (left-to-right or right-to-left). If not present, the base 2662 * direction is computed using the Unicode Bidirectional Algorithm, 2663 * defaulting to left-to-right if there are no strong directional characters 2664 * in the text. This attribute, if present, must be applied to all the text 2665 * in the paragraph.<p> 2666 * 2667 * The BIDI_EMBEDDING attribute in the text, if present, represents 2668 * embedding level information. Negative values from -1 to -62 indicate 2669 * overrides at the absolute value of the level. Positive values from 1 to 2670 * 62 indicate embeddings. Where values are zero or not defined, the base 2671 * embedding level as determined by the base direction is assumed.<p> 2672 * 2673 * The NUMERIC_SHAPING attribute in the text, if present, converts European 2674 * digits to other decimal digits before running the bidi algorithm. This 2675 * attribute, if present, must be applied to all the text in the paragraph. 2676 * 2677 * If the entire text is all of the same directionality, then 2678 * the method may not perform all the steps described by the algorithm, 2679 * i.e., some levels may not be the same as if all steps were performed. 2680 * This is not relevant for unidirectional text.<br> 2681 * For example, in pure LTR text with numbers the numbers would get 2682 * a resolved level of 2 higher than the surrounding text according to 2683 * the algorithm. This implementation may set all resolved levels to 2684 * the same value in such a case.<p> 2685 * 2686 * @param paragraph a paragraph of text with optional character and 2687 * paragraph attribute information 2688 * @stable ICU 3.8 2689 */ 2690 public void setPara(AttributedCharacterIterator paragraph) 2691 { 2692 byte paraLvl; 2693 Boolean runDirection = 2694 (Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION); 2695 Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING); 2696 if (runDirection == null) { 2697 paraLvl = INTERNAL_LEVEL_DEFAULT_LTR; 2698 } else { 2699 paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ? 2700 (byte)Bidi.DIRECTION_LEFT_TO_RIGHT : (byte)Bidi.DIRECTION_RIGHT_TO_LEFT; 2701 } 2702 2703 byte[] lvls = null; 2704 int len = paragraph.getEndIndex() - paragraph.getBeginIndex(); 2705 byte[] embeddingLevels = new byte[len]; 2706 char[] txt = new char[len]; 2707 int i = 0; 2708 char ch = paragraph.first(); 2709 while (ch != AttributedCharacterIterator.DONE) { 2710 txt[i] = ch; 2711 Integer embedding = 2712 (Integer) paragraph.getAttribute(TextAttributeConstants.BIDI_EMBEDDING); 2713 if (embedding != null) { 2714 byte level = embedding.byteValue(); 2715 if (level == 0) { 2716 /* no-op */ 2717 } else if (level < 0) { 2718 lvls = embeddingLevels; 2719 embeddingLevels[i] = (byte)((0 - level) | INTERNAL_LEVEL_OVERRIDE); 2720 } else { 2721 lvls = embeddingLevels; 2722 embeddingLevels[i] = level; 2723 } 2724 } 2725 ch = paragraph.next(); 2726 ++i; 2727 } 2728 2729 if (shaper != null) { 2730 NumericShapings.shape(shaper, txt, 0, len); 2731 } 2732 setPara(txt, paraLvl, lvls); 2733 } 2734 2735 /** 2736 * Specify whether block separators must be allocated level zero, 2737 * so that successive paragraphs will progress from left to right. 2738 * This method must be called before <code>setPara()</code>. 2739 * Paragraph separators (B) may appear in the text. Setting them to level zero 2740 * means that all paragraph separators (including one possibly appearing 2741 * in the last text position) are kept in the reordered text after the text 2742 * that they follow in the source text. 2743 * When this feature is not enabled, a paragraph separator at the last 2744 * position of the text before reordering will go to the first position 2745 * of the reordered text when the paragraph level is odd. 2746 * 2747 * @param ordarParaLTR specifies whether paragraph separators (B) must 2748 * receive level 0, so that successive paragraphs progress from left to right. 2749 * 2750 * @see #setPara 2751 * @stable ICU 3.8 2752 */ 2753 private void orderParagraphsLTR(boolean ordarParaLTR) { 2754 orderParagraphsLTR = ordarParaLTR; 2755 } 2756 2757 /** 2758 * Get the directionality of the text. 2759 * 2760 * @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code> 2761 * that indicates if the entire text 2762 * represented by this object is unidirectional, 2763 * and which direction, or if it is mixed-directional. 2764 * 2765 * @throws IllegalStateException if this call is not preceded by a successful 2766 * call to <code>setPara</code> or <code>setLine</code> 2767 * 2768 * @see #LTR 2769 * @see #RTL 2770 * @see #MIXED 2771 * @stable ICU 3.8 2772 */ 2773 private byte getDirection() 2774 { 2775 verifyValidParaOrLine(); 2776 return direction; 2777 } 2778 2779 /** 2780 * Get the length of the text. 2781 * 2782 * @return The length of the text that the <code>Bidi</code> object was 2783 * created for. 2784 * 2785 * @throws IllegalStateException if this call is not preceded by a successful 2786 * call to <code>setPara</code> or <code>setLine</code> 2787 * @stable ICU 3.8 2788 */ 2789 public int getLength() 2790 { 2791 verifyValidParaOrLine(); 2792 return originalLength; 2793 } 2794 2795 /* paragraphs API methods ------------------------------------------------- */ 2796 2797 /** 2798 * Get the paragraph level of the text. 2799 * 2800 * @return The paragraph level. If there are multiple paragraphs, their 2801 * level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or 2802 * LEVEL_DEFAULT_RTL. In that case, the level of the first paragraph 2803 * is returned. 2804 * 2805 * @throws IllegalStateException if this call is not preceded by a successful 2806 * call to <code>setPara</code> or <code>setLine</code> 2807 * 2808 * @see #LEVEL_DEFAULT_LTR 2809 * @see #LEVEL_DEFAULT_RTL 2810 * @see #getParagraph 2811 * @see #getParagraphByIndex 2812 * @stable ICU 3.8 2813 */ 2814 public byte getParaLevel() 2815 { 2816 verifyValidParaOrLine(); 2817 return paraLevel; 2818 } 2819 2820 /** 2821 * Get the index of a paragraph, given a position within the text.<p> 2822 * 2823 * @param charIndex is the index of a character within the text, in the 2824 * range <code>[0..getProcessedLength()-1]</code>. 2825 * 2826 * @return The index of the paragraph containing the specified position, 2827 * starting from 0. 2828 * 2829 * @throws IllegalStateException if this call is not preceded by a successful 2830 * call to <code>setPara</code> or <code>setLine</code> 2831 * @throws IllegalArgumentException if charIndex is not within the legal range 2832 * 2833 * @see com.ibm.icu.text.BidiRun 2834 * @see #getProcessedLength 2835 * @stable ICU 3.8 2836 */ 2837 public int getParagraphIndex(int charIndex) 2838 { 2839 verifyValidParaOrLine(); 2840 BidiBase bidi = paraBidi; /* get Para object if Line object */ 2841 verifyRange(charIndex, 0, bidi.length); 2842 int paraIndex; 2843 for (paraIndex = 0; charIndex >= bidi.paras[paraIndex]; paraIndex++) { 2844 } 2845 return paraIndex; 2846 } 2847 2848 /** 2849 * <code>setLine()</code> returns a <code>Bidi</code> object to 2850 * contain the reordering information, especially the resolved levels, 2851 * for all the characters in a line of text. This line of text is 2852 * specified by referring to a <code>Bidi</code> object representing 2853 * this information for a piece of text containing one or more paragraphs, 2854 * and by specifying a range of indexes in this text.<p> 2855 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p> 2856 * 2857 * This is used after calling <code>setPara()</code> 2858 * for a piece of text, and after line-breaking on that text. 2859 * It is not necessary if each paragraph is treated as a single line.<p> 2860 * 2861 * After line-breaking, rules (L1) and (L2) for the treatment of 2862 * trailing WS and for reordering are performed on 2863 * a <code>Bidi</code> object that represents a line.<p> 2864 * 2865 * <strong>Important: </strong>the line <code>Bidi</code> object may 2866 * reference data within the global text <code>Bidi</code> object. 2867 * You should not alter the content of the global text object until 2868 * you are finished using the line object. 2869 * 2870 * @param start is the line's first index into the text. 2871 * 2872 * @param limit is just behind the line's last index into the text 2873 * (its last index +1). 2874 * 2875 * @return a <code>Bidi</code> object that will now represent a line of the text. 2876 * 2877 * @throws IllegalStateException if this call is not preceded by a successful 2878 * call to <code>setPara</code> 2879 * @throws IllegalArgumentException if start and limit are not in the range 2880 * <code>0<=start<limit<=getProcessedLength()</code>, 2881 * or if the specified line crosses a paragraph boundary 2882 * 2883 * @see #setPara 2884 * @see #getProcessedLength 2885 * @stable ICU 3.8 2886 */ 2887 public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit) 2888 { 2889 verifyValidPara(); 2890 verifyRange(start, 0, limit); 2891 verifyRange(limit, 0, length+1); 2892 2893 return BidiLine.setLine(bidi, this, newBidi, newBidiBase, start, limit); 2894 } 2895 2896 /** 2897 * Get the level for one character. 2898 * 2899 * @param charIndex the index of a character. 2900 * 2901 * @return The level for the character at <code>charIndex</code>. 2902 * 2903 * @throws IllegalStateException if this call is not preceded by a successful 2904 * call to <code>setPara</code> or <code>setLine</code> 2905 * @throws IllegalArgumentException if charIndex is not in the range 2906 * <code>0<=charIndex<getProcessedLength()</code> 2907 * 2908 * @see #getProcessedLength 2909 * @stable ICU 3.8 2910 */ 2911 public byte getLevelAt(int charIndex) 2912 { 2913 if (charIndex < 0 || charIndex >= length) { 2914 return (byte)getBaseLevel(); 2915 } 2916 verifyValidParaOrLine(); 2917 verifyRange(charIndex, 0, length); 2918 return BidiLine.getLevelAt(this, charIndex); 2919 } 2920 2921 /** 2922 * Get an array of levels for each character.<p> 2923 * 2924 * Note that this method may allocate memory under some 2925 * circumstances, unlike <code>getLevelAt()</code>. 2926 * 2927 * @return The levels array for the text, 2928 * or <code>null</code> if an error occurs. 2929 * 2930 * @throws IllegalStateException if this call is not preceded by a successful 2931 * call to <code>setPara</code> or <code>setLine</code> 2932 * @stable ICU 3.8 2933 */ 2934 private byte[] getLevels() 2935 { 2936 verifyValidParaOrLine(); 2937 if (length <= 0) { 2938 return new byte[0]; 2939 } 2940 return BidiLine.getLevels(this); 2941 } 2942 2943 /** 2944 * Get the number of runs. 2945 * This method may invoke the actual reordering on the 2946 * <code>Bidi</code> object, after <code>setPara()</code> 2947 * may have resolved only the levels of the text. Therefore, 2948 * <code>countRuns()</code> may have to allocate memory, 2949 * and may throw an exception if it fails to do so. 2950 * 2951 * @return The number of runs. 2952 * 2953 * @throws IllegalStateException if this call is not preceded by a successful 2954 * call to <code>setPara</code> or <code>setLine</code> 2955 * @stable ICU 3.8 2956 */ 2957 public int countRuns() 2958 { 2959 verifyValidParaOrLine(); 2960 BidiLine.getRuns(this); 2961 return runCount; 2962 } 2963 2964 /** 2965 * Get a visual-to-logical index map (array) for the characters in the 2966 * <code>Bidi</code> (paragraph or line) object. 2967 * <p> 2968 * Some values in the map may be <code>MAP_NOWHERE</code> if the 2969 * corresponding text characters are Bidi marks inserted in the visual 2970 * output by the option <code>OPTION_INSERT_MARKS</code>. 2971 * <p> 2972 * When the visual output is altered by using options of 2973 * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>, 2974 * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>, 2975 * <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not 2976 * be correct. It is advised to use, when possible, reordering options 2977 * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}. 2978 * 2979 * @return an array of <code>getResultLength()</code> 2980 * indexes which will reflect the reordering of the characters.<br><br> 2981 * The index map will result in 2982 * <code>indexMap[visualIndex]==logicalIndex</code>, where 2983 * <code>indexMap</code> represents the returned array. 2984 * 2985 * @throws IllegalStateException if this call is not preceded by a successful 2986 * call to <code>setPara</code> or <code>setLine</code> 2987 * 2988 * @see #getLogicalMap 2989 * @see #getLogicalIndex 2990 * @see #getResultLength 2991 * @see #MAP_NOWHERE 2992 * @see #OPTION_INSERT_MARKS 2993 * @see #writeReordered 2994 * @stable ICU 3.8 2995 */ 2996 private int[] getVisualMap() 2997 { 2998 /* countRuns() checks successful call to setPara/setLine */ 2999 countRuns(); 3000 if (resultLength <= 0) { 3001 return new int[0]; 3002 } 3003 return BidiLine.getVisualMap(this); 3004 } 3005 3006 /** 3007 * This is a convenience method that does not use a <code>Bidi</code> object. 3008 * It is intended to be used for when an application has determined the levels 3009 * of objects (character sequences) and just needs to have them reordered (L2). 3010 * This is equivalent to using <code>getVisualMap()</code> on a 3011 * <code>Bidi</code> object. 3012 * 3013 * @param levels is an array of levels that have been determined by 3014 * the application. 3015 * 3016 * @return an array of <code>levels.length</code> 3017 * indexes which will reflect the reordering of the characters.<p> 3018 * The index map will result in 3019 * <code>indexMap[visualIndex]==logicalIndex</code>, where 3020 * <code>indexMap</code> represents the returned array. 3021 * 3022 * @stable ICU 3.8 3023 */ 3024 private static int[] reorderVisual(byte[] levels) 3025 { 3026 return BidiLine.reorderVisual(levels); 3027 } 3028 3029 /** 3030 * Constant indicating that the base direction depends on the first strong 3031 * directional character in the text according to the Unicode Bidirectional 3032 * Algorithm. If no strong directional character is present, the base 3033 * direction is left-to-right. 3034 * @stable ICU 3.8 3035 */ 3036 private static final int INTERNAL_DIRECTION_DEFAULT_LEFT_TO_RIGHT = 0x7e; 3037 3038 /** 3039 * Constant indicating that the base direction depends on the first strong 3040 * directional character in the text according to the Unicode Bidirectional 3041 * Algorithm. If no strong directional character is present, the base 3042 * direction is right-to-left. 3043 * @stable ICU 3.8 3044 */ 3045 private static final int INTERMAL_DIRECTION_DEFAULT_RIGHT_TO_LEFT = 0x7f; 3046 3047 /** 3048 * Create Bidi from the given text, embedding, and direction information. 3049 * The embeddings array may be null. If present, the values represent 3050 * embedding level information. Negative values from -1 to -61 indicate 3051 * overrides at the absolute value of the level. Positive values from 1 to 3052 * 61 indicate embeddings. Where values are zero, the base embedding level 3053 * as determined by the base direction is assumed.<p> 3054 * 3055 * Note: this constructor calls setPara() internally. 3056 * 3057 * @param text an array containing the paragraph of text to process. 3058 * @param textStart the index into the text array of the start of the 3059 * paragraph. 3060 * @param embeddings an array containing embedding values for each character 3061 * in the paragraph. This can be null, in which case it is assumed 3062 * that there is no external embedding information. 3063 * @param embStart the index into the embedding array of the start of the 3064 * paragraph. 3065 * @param paragraphLength the length of the paragraph in the text and 3066 * embeddings arrays. 3067 * @param flags a collection of flags that control the algorithm. The 3068 * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, 3069 * DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and 3070 * DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved. 3071 * 3072 * @throws IllegalArgumentException if the values in embeddings are 3073 * not within the allowed range 3074 * 3075 * @see #DIRECTION_LEFT_TO_RIGHT 3076 * @see #DIRECTION_RIGHT_TO_LEFT 3077 * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT 3078 * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT 3079 * @stable ICU 3.8 3080 */ 3081 public BidiBase(char[] text, 3082 int textStart, 3083 byte[] embeddings, 3084 int embStart, 3085 int paragraphLength, 3086 int flags) 3087 { 3088 this(0, 0); 3089 byte paraLvl; 3090 switch (flags) { 3091 case Bidi.DIRECTION_LEFT_TO_RIGHT: 3092 default: 3093 paraLvl = Bidi.DIRECTION_LEFT_TO_RIGHT; 3094 break; 3095 case Bidi.DIRECTION_RIGHT_TO_LEFT: 3096 paraLvl = Bidi.DIRECTION_RIGHT_TO_LEFT; 3097 break; 3098 case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT: 3099 paraLvl = INTERNAL_LEVEL_DEFAULT_LTR; 3100 break; 3101 case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT: 3102 paraLvl = INTERNAL_LEVEL_DEFAULT_RTL; 3103 break; 3104 } 3105 byte[] paraEmbeddings; 3106 if (embeddings == null) { 3107 paraEmbeddings = null; 3108 } else { 3109 paraEmbeddings = new byte[paragraphLength]; 3110 byte lev; 3111 for (int i = 0; i < paragraphLength; i++) { 3112 lev = embeddings[i + embStart]; 3113 if (lev < 0) { 3114 lev = (byte)((- lev) | INTERNAL_LEVEL_OVERRIDE); 3115 } else if (lev == 0) { 3116 lev = paraLvl; 3117 if (paraLvl > MAX_EXPLICIT_LEVEL) { 3118 lev &= 1; 3119 } 3120 } 3121 paraEmbeddings[i] = lev; 3122 } 3123 } 3124 if (textStart == 0 && embStart == 0 && paragraphLength == text.length) { 3125 setPara(text, paraLvl, paraEmbeddings); 3126 } else { 3127 char[] paraText = new char[paragraphLength]; 3128 System.arraycopy(text, textStart, paraText, 0, paragraphLength); 3129 setPara(paraText, paraLvl, paraEmbeddings); 3130 } 3131 } 3132 3133 /** 3134 * Return true if the line is not left-to-right or right-to-left. This means 3135 * it either has mixed runs of left-to-right and right-to-left text, or the 3136 * base direction differs from the direction of the only run of text. 3137 * 3138 * @return true if the line is not left-to-right or right-to-left. 3139 * 3140 * @throws IllegalStateException if this call is not preceded by a successful 3141 * call to <code>setPara</code> 3142 * @stable ICU 3.8 3143 */ 3144 public boolean isMixed() 3145 { 3146 return (!isLeftToRight() && !isRightToLeft()); 3147 } 3148 3149 /** 3150 * Return true if the line is all left-to-right text and the base direction 3151 * is left-to-right. 3152 * 3153 * @return true if the line is all left-to-right text and the base direction 3154 * is left-to-right. 3155 * 3156 * @throws IllegalStateException if this call is not preceded by a successful 3157 * call to <code>setPara</code> 3158 * @stable ICU 3.8 3159 */ 3160 public boolean isLeftToRight() 3161 { 3162 return (getDirection() == Bidi.DIRECTION_LEFT_TO_RIGHT && (paraLevel & 1) == 0); 3163 } 3164 3165 /** 3166 * Return true if the line is all right-to-left text, and the base direction 3167 * is right-to-left 3168 * 3169 * @return true if the line is all right-to-left text, and the base 3170 * direction is right-to-left 3171 * 3172 * @throws IllegalStateException if this call is not preceded by a successful 3173 * call to <code>setPara</code> 3174 * @stable ICU 3.8 3175 */ 3176 public boolean isRightToLeft() 3177 { 3178 return (getDirection() == Bidi.DIRECTION_RIGHT_TO_LEFT && (paraLevel & 1) == 1); 3179 } 3180 3181 /** 3182 * Return true if the base direction is left-to-right 3183 * 3184 * @return true if the base direction is left-to-right 3185 * 3186 * @throws IllegalStateException if this call is not preceded by a successful 3187 * call to <code>setPara</code> or <code>setLine</code> 3188 * 3189 * @stable ICU 3.8 3190 */ 3191 public boolean baseIsLeftToRight() 3192 { 3193 return (getParaLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT); 3194 } 3195 3196 /** 3197 * Return the base level (0 if left-to-right, 1 if right-to-left). 3198 * 3199 * @return the base level 3200 * 3201 * @throws IllegalStateException if this call is not preceded by a successful 3202 * call to <code>setPara</code> or <code>setLine</code> 3203 * 3204 * @stable ICU 3.8 3205 */ 3206 public int getBaseLevel() 3207 { 3208 return getParaLevel(); 3209 } 3210 3211 /** 3212 * Compute the logical to visual run mapping 3213 */ 3214 private void getLogicalToVisualRunsMap() 3215 { 3216 if (isGoodLogicalToVisualRunsMap) { 3217 return; 3218 } 3219 int count = countRuns(); 3220 if ((logicalToVisualRunsMap == null) || 3221 (logicalToVisualRunsMap.length < count)) { 3222 logicalToVisualRunsMap = new int[count]; 3223 } 3224 int i; 3225 long[] keys = new long[count]; 3226 for (i = 0; i < count; i++) { 3227 keys[i] = ((long)(runs[i].start)<<32) + i; 3228 } 3229 Arrays.sort(keys); 3230 for (i = 0; i < count; i++) { 3231 logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF); 3232 } 3233 keys = null; 3234 isGoodLogicalToVisualRunsMap = true; 3235 } 3236 3237 /** 3238 * Return the level of the nth logical run in this line. 3239 * 3240 * @param run the index of the run, between 0 and <code>countRuns()-1</code> 3241 * 3242 * @return the level of the run 3243 * 3244 * @throws IllegalStateException if this call is not preceded by a successful 3245 * call to <code>setPara</code> or <code>setLine</code> 3246 * @throws IllegalArgumentException if <code>run</code> is not in 3247 * the range <code>0<=run<countRuns()</code> 3248 * @stable ICU 3.8 3249 */ 3250 public int getRunLevel(int run) 3251 { 3252 verifyValidParaOrLine(); 3253 BidiLine.getRuns(this); 3254 if (runCount == 1) { 3255 return getParaLevel(); 3256 } 3257 verifyIndex(run, 0, runCount); 3258 getLogicalToVisualRunsMap(); 3259 return runs[logicalToVisualRunsMap[run]].level; 3260 } 3261 3262 /** 3263 * Return the index of the character at the start of the nth logical run in 3264 * this line, as an offset from the start of the line. 3265 * 3266 * @param run the index of the run, between 0 and <code>countRuns()</code> 3267 * 3268 * @return the start of the run 3269 * 3270 * @throws IllegalStateException if this call is not preceded by a successful 3271 * call to <code>setPara</code> or <code>setLine</code> 3272 * @throws IllegalArgumentException if <code>run</code> is not in 3273 * the range <code>0<=run<countRuns()</code> 3274 * @stable ICU 3.8 3275 */ 3276 public int getRunStart(int run) 3277 { 3278 verifyValidParaOrLine(); 3279 BidiLine.getRuns(this); 3280 if (runCount == 1) { 3281 return 0; 3282 } else if (run == runCount) { 3283 return length; 3284 } 3285 verifyIndex(run, 0, runCount); 3286 getLogicalToVisualRunsMap(); 3287 return runs[logicalToVisualRunsMap[run]].start; 3288 } 3289 3290 /** 3291 * Return the index of the character past the end of the nth logical run in 3292 * this line, as an offset from the start of the line. For example, this 3293 * will return the length of the line for the last run on the line. 3294 * 3295 * @param run the index of the run, between 0 and <code>countRuns()</code> 3296 * 3297 * @return the limit of the run 3298 * 3299 * @throws IllegalStateException if this call is not preceded by a successful 3300 * call to <code>setPara</code> or <code>setLine</code> 3301 * @throws IllegalArgumentException if <code>run</code> is not in 3302 * the range <code>0<=run<countRuns()</code> 3303 * @stable ICU 3.8 3304 */ 3305 public int getRunLimit(int run) 3306 { 3307 verifyValidParaOrLine(); 3308 BidiLine.getRuns(this); 3309 if (runCount == 1) { 3310 return length; 3311 } 3312 verifyIndex(run, 0, runCount); 3313 getLogicalToVisualRunsMap(); 3314 int idx = logicalToVisualRunsMap[run]; 3315 int len = idx == 0 ? runs[idx].limit : 3316 runs[idx].limit - runs[idx-1].limit; 3317 return runs[idx].start + len; 3318 } 3319 3320 /** 3321 * Return true if the specified text requires bidi analysis. If this returns 3322 * false, the text will display left-to-right. Clients can then avoid 3323 * constructing a Bidi object. Text in the Arabic Presentation Forms area of 3324 * Unicode is presumed to already be shaped and ordered for display, and so 3325 * will not cause this method to return true. 3326 * 3327 * @param text the text containing the characters to test 3328 * @param start the start of the range of characters to test 3329 * @param limit the limit of the range of characters to test 3330 * 3331 * @return true if the range of characters requires bidi analysis 3332 * 3333 * @stable ICU 3.8 3334 */ 3335 public static boolean requiresBidi(char[] text, 3336 int start, 3337 int limit) 3338 { 3339 final int RTLMask = (1 << Bidi.DIRECTION_RIGHT_TO_LEFT | 3340 1 << AL | 3341 1 << RLE | 3342 1 << RLO | 3343 1 << AN); 3344 3345 if (0 > start || start > limit || limit > text.length) { 3346 throw new IllegalArgumentException("Value start " + start + 3347 " is out of range 0 to " + limit); 3348 } 3349 for (int i = start; i < limit; ++i) { 3350 if (Character.isHighSurrogate(text[i]) && i < (limit-1) && 3351 Character.isLowSurrogate(text[i+1])) { 3352 if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) { 3353 return true; 3354 } 3355 } else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) { 3356 return true; 3357 } 3358 } 3359 return false; 3360 } 3361 3362 /** 3363 * Reorder the objects in the array into visual order based on their levels. 3364 * This is a utility method to use when you have a collection of objects 3365 * representing runs of text in logical order, each run containing text at a 3366 * single level. The elements at <code>index</code> from 3367 * <code>objectStart</code> up to <code>objectStart + count</code> in the 3368 * objects array will be reordered into visual order assuming 3369 * each run of text has the level indicated by the corresponding element in 3370 * the levels array (at <code>index - objectStart + levelStart</code>). 3371 * 3372 * @param levels an array representing the bidi level of each object 3373 * @param levelStart the start position in the levels array 3374 * @param objects the array of objects to be reordered into visual order 3375 * @param objectStart the start position in the objects array 3376 * @param count the number of objects to reorder 3377 * @stable ICU 3.8 3378 */ 3379 public static void reorderVisually(byte[] levels, 3380 int levelStart, 3381 Object[] objects, 3382 int objectStart, 3383 int count) 3384 { 3385 if (0 > levelStart || levels.length <= levelStart) { 3386 throw new IllegalArgumentException("Value levelStart " + 3387 levelStart + " is out of range 0 to " + 3388 (levels.length-1)); 3389 } 3390 if (0 > objectStart || objects.length <= objectStart) { 3391 throw new IllegalArgumentException("Value objectStart " + 3392 levelStart + " is out of range 0 to " + 3393 (objects.length-1)); 3394 } 3395 if (0 > count || objects.length < (objectStart+count)) { 3396 throw new IllegalArgumentException("Value count " + 3397 levelStart + " is out of range 0 to " + 3398 (objects.length - objectStart)); 3399 } 3400 byte[] reorderLevels = new byte[count]; 3401 System.arraycopy(levels, levelStart, reorderLevels, 0, count); 3402 int[] indexMap = reorderVisual(reorderLevels); 3403 Object[] temp = new Object[count]; 3404 System.arraycopy(objects, objectStart, temp, 0, count); 3405 for (int i = 0; i < count; ++i) { 3406 objects[objectStart + i] = temp[indexMap[i]]; 3407 } 3408 } 3409 3410 /** 3411 * Display the bidi internal state, used in debugging. 3412 */ 3413 public String toString() { 3414 StringBuffer buf = new StringBuffer(super.toString()); 3415 3416 buf.append("[dir: " + direction); 3417 buf.append(" baselevel: " + paraLevel); 3418 buf.append(" length: " + length); 3419 buf.append(" runs: "); 3420 if (levels == null) { 3421 buf.append("null"); 3422 } else { 3423 buf.append('['); 3424 buf.append(levels[0]); 3425 for (int i = 0; i < levels.length; i++) { 3426 buf.append(' '); 3427 buf.append(levels[i]); 3428 } 3429 buf.append(']'); 3430 } 3431 buf.append(" text: [0x"); 3432 buf.append(Integer.toHexString(text[0])); 3433 for (int i = 0; i < text.length; i++) { 3434 buf.append(" 0x"); 3435 buf.append(Integer.toHexString(text[i])); 3436 } 3437 buf.append(']'); 3438 buf.append(']'); 3439 3440 return buf.toString(); 3441 } 3442 3443 /** 3444 * A class that provides access to constants defined by 3445 * java.awt.font.TextAttribute without creating a static dependency. 3446 */ 3447 private static class TextAttributeConstants { 3448 private static final Class<?> clazz = getClass("java.awt.font.TextAttribute"); 3449 3450 /** 3451 * TextAttribute instances (or a fake Attribute type if 3452 * java.awt.font.TextAttribute is not present) 3453 */ 3454 static final AttributedCharacterIterator.Attribute RUN_DIRECTION = 3455 getTextAttribute("RUN_DIRECTION"); 3456 static final AttributedCharacterIterator.Attribute NUMERIC_SHAPING = 3457 getTextAttribute("NUMERIC_SHAPING"); 3458 static final AttributedCharacterIterator.Attribute BIDI_EMBEDDING = 3459 getTextAttribute("BIDI_EMBEDDING"); 3460 3461 /** 3462 * TextAttribute.RUN_DIRECTION_LTR 3463 */ 3464 static final Boolean RUN_DIRECTION_LTR = (clazz == null) ? 3465 Boolean.FALSE : (Boolean)getStaticField(clazz, "RUN_DIRECTION_LTR"); 3466 3467 3468 private static Class<?> getClass(String name) { 3469 try { 3470 return Class.forName(name, true, null); 3471 } catch (ClassNotFoundException e) { 3472 return null; 3473 } 3474 } 3475 3476 private static Object getStaticField(Class<?> clazz, String name) { 3477 try { 3478 Field f = clazz.getField(name); 3479 return f.get(null); 3480 } catch (NoSuchFieldException | IllegalAccessException x) { 3481 throw new AssertionError(x); 3482 } 3483 } 3484 3485 private static AttributedCharacterIterator.Attribute 3486 getTextAttribute(String name) 3487 { 3488 if (clazz == null) { 3489 // fake attribute 3490 return new AttributedCharacterIterator.Attribute(name) { }; 3491 } else { 3492 return (AttributedCharacterIterator.Attribute)getStaticField(clazz, name); 3493 } 3494 } 3495 } 3496 3497 /** 3498 * A class that provides access to java.awt.font.NumericShaping without 3499 * creating a static dependency. 3500 */ 3501 private static class NumericShapings { 3502 private static final Class<?> clazz = 3503 getClass("java.awt.font.NumericShaper"); 3504 private static final Method shapeMethod = 3505 getMethod(clazz, "shape", char[].class, int.class, int.class); 3506 3507 private static Class<?> getClass(String name) { 3508 try { 3509 return Class.forName(name, true, null); 3510 } catch (ClassNotFoundException e) { 3511 return null; 3512 } 3513 } 3514 3515 private static Method getMethod(Class<?> clazz, 3516 String name, 3517 Class<?>... paramTypes) 3518 { 3519 if (clazz != null) { 3520 try { 3521 return clazz.getMethod(name, paramTypes); 3522 } catch (NoSuchMethodException e) { 3523 throw new AssertionError(e); 3524 } 3525 } else { 3526 return null; 3527 } 3528 } 3529 3530 /** 3531 * Invokes NumericShaping shape(text,start,count) method. 3532 */ 3533 static void shape(Object shaper, char[] text, int start, int count) { 3534 if (shapeMethod == null) 3535 throw new AssertionError("Should not get here"); 3536 try { 3537 shapeMethod.invoke(shaper, text, start, count); 3538 } catch (InvocationTargetException e) { 3539 Throwable cause = e.getCause(); 3540 if (cause instanceof RuntimeException) 3541 throw (RuntimeException)cause; 3542 throw new AssertionError(e); 3543 } catch (IllegalAccessException iae) { 3544 throw new AssertionError(iae); 3545 } 3546 } 3547 } 3548 }